In [15]:
# =========================
# Minimaler Endlauf: Profilbericht α/β (zweistufig) – MIT DETAILWERTEN
# =========================

import os, numpy as np, pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import load_model

# --- Pfade ---
DATA_PATH   = "C:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Masterarbeit_Guetekriterien-sichere-und-interpetierbare-Hochrisiko-KI-Systeme/data/german.data"
MODEL_PATH  = "C:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Masterarbeit_Guetekriterien-sichere-und-interpetierbare-Hochrisiko-KI-Systeme/models/german_credit_model.keras"
PROFILES_CSV= "C:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Masterarbeit_Guetekriterien-sichere-und-interpetierbare-Hochrisiko-KI-Systeme/results/diverse_profiles.csv"
REPORT_PATH = "C:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Masterarbeit_Guetekriterien-sichere-und-interpetierbare-Hochrisiko-KI-Systeme/results/profilbericht_alpha_beta_twostage.md"

# (optional) Details zusätzlich auch als CSV persistieren?
WRITE_ALPHA_DETAILS_CSV = True
WRITE_BETA_DETAILS_CSV  = True
ALPHA_DETAILS_CSV = "c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Masterarbeit_Guetekriterien-sichere-und-interpetierbare-Hochrisiko-KI-Systeme/results/alpha_details.csv"
BETA_DETAILS_CSV  = "c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Masterarbeit_Guetekriterien-sichere-und-interpetierbare-Hochrisiko-KI-Systeme/results/beta_details.csv"

# =========================
# Laden & Vorverarbeitung
# =========================
df = pd.read_csv(DATA_PATH, header=None, sep=r"\s+")
df.columns = [
    "Status_des_Girokontos", "Dauer_in_Monaten", "Kreditgeschichte", "Kreditverwendungszweck",
    "Kreditbetrag", "Sparkonto_Wertpapiere", "Beschäftigt_seit", "Ratenhöhe",
    "Familienstand_Geschlecht", "Weitere_Bürgen_Schuldner", "Wohnsitzdauer", "Vermögen", "Alter",
    "Andere_Ratenverpflichtungen", "Wohnsituation", "Anzahl_bestehender_Kredite", "Beruf",
    "Unterhaltspflichtige_Personen", "Telefon", "Ausländischer_Arbeiter", "Ziel"
]
df["Ziel"] = df["Ziel"].map({1: 1, 2: 0}).astype(int)

X_all = df.drop(columns=["Ziel"])
y_all = df["Ziel"].values

# Numerisch/Kategorisch wie in deinem Setup
numerical_cols = [
    "Dauer_in_Monaten", "Kreditbetrag", "Ratenhöhe", "Wohnsitzdauer",
    "Alter", "Anzahl_bestehender_Kredite", "Unterhaltspflichtige_Personen"
]
categorical_cols = [c for c in X_all.columns if c not in numerical_cols]

preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numerical_cols),
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols),
])
preprocessor.fit(X_all)

# --- Modell laden ---
if not os.path.exists(MODEL_PATH):
    raise FileNotFoundError(f"Modelldatei fehlt: {MODEL_PATH}")
model = load_model(MODEL_PATH)

# --- Profile laden & Spalten ausrichten ---
profiles_df = pd.read_csv(PROFILES_CSV)
extra_cols = [c for c in profiles_df.columns if c not in X_all.columns]
if extra_cols:
    profiles_df = profiles_df.drop(columns=extra_cols)
profiles_df = profiles_df[X_all.columns]
profiles_df[numerical_cols] = profiles_df[numerical_cols].apply(pd.to_numeric, errors="coerce").astype(float)

# =========================
# Feste Feature-Gruppen (gemäß deiner schriftlichen Definition)
# =========================
econ_num = ['Kreditbetrag', 'Dauer_in_Monaten', 'Ratenhöhe', 'Anzahl_bestehender_Kredite']
econ_cat = ['Kreditgeschichte', 'Kreditverwendungszweck', 'Sparkonto_Wertpapiere', 'Vermögen',
            'Status_des_Girokontos', 'Beschäftigt_seit', 'Andere_Ratenverpflichtungen', 'Weitere_Bürgen_Schuldner']

socio_num = ['Alter', 'Wohnsitzdauer', 'Unterhaltspflichtige_Personen']
socio_cat = ['Familienstand_Geschlecht', 'Wohnsituation', 'Telefon', 'Ausländischer_Arbeiter', 'Beruf']

# =========================
# α / β – Funktionen (MIT DETAILS)
# =========================
def _predict_label_and_proba(model, preprocessor, row_df, threshold=0.5):
    Xp = preprocessor.transform(row_df)
    proba = float(model.predict(Xp, verbose=0)[0, 0])
    label = int(proba >= threshold)
    return label, proba

def _numeric_grid_around(value, lo, hi, n_steps=9):
    grid = np.linspace(lo, hi, n_steps)
    return [v for v in grid if abs(v - value) > 1e-12]

# ---- α: Notwendigkeit, mit exakten CF-Werten ----
def alpha_test_numeric(model, preprocessor, df_row, feature, bounds, threshold=0.5,
                       n_steps=9, clip_min=None):
    orig_label, orig_proba = _predict_label_and_proba(model, preprocessor, df_row, threshold)
    lo, hi = bounds
    orig_value = float(df_row[feature].iloc[0])
    test_values = _numeric_grid_around(orig_value, lo, hi, n_steps=n_steps)

    best = None
    for v in test_values:
        v_clip = float(max(v, clip_min)) if clip_min is not None else float(v)
        mod_row = df_row.copy()
        mod_row.at[df_row.index[0], feature] = np.float64(v_clip)
        new_label, new_proba = _predict_label_and_proba(model, preprocessor, mod_row, threshold)
        if new_label != orig_label:
            change = abs(v_clip - orig_value)
            if (best is None) or (change < best["delta_abs"]):
                best = {
                    "is_necessary": True,
                    "original_label": int(orig_label),
                    "original_proba": float(orig_proba),
                    "original_value": float(orig_value),
                    "cf_value": float(v_clip),
                    "delta_abs": float(change),
                    "cf_label": int(new_label),
                    "cf_proba": float(new_proba),
                }

    if best is None:
        return {
            "is_necessary": False,
            "original_label": int(orig_label),
            "original_proba": float(orig_proba),
            "original_value": float(orig_value),
            "cf_value": None,
            "delta_abs": None,
            "cf_label": None,
            "cf_proba": None,
        }
    return best

def alpha_test_categorical(model, preprocessor, df_row, feature, all_categories, threshold=0.5):
    orig_label, orig_proba = _predict_label_and_proba(model, preprocessor, df_row, threshold)
    current_cat = df_row[feature].iloc[0]

    for cat in all_categories:
        if cat == current_cat:
            continue
        mod_row = df_row.copy()
        mod_row.at[df_row.index[0], feature] = cat
        new_label, new_proba = _predict_label_and_proba(model, preprocessor, mod_row, threshold)
        if new_label != orig_label:
            return {
                "is_necessary": True,
                "original_label": int(orig_label),
                "original_proba": float(orig_proba),
                "original_value": current_cat,
                "cf_value": cat,
                "delta_abs": None,  # nicht sinnvoll für Kategorien
                "cf_label": int(new_label),
                "cf_proba": float(new_proba),
            }

    return {
        "is_necessary": False,
        "original_label": int(orig_label),
        "original_proba": float(orig_proba),
        "original_value": current_cat,
        "cf_value": None,
        "delta_abs": None,
        "cf_label": None,
        "cf_proba": None,
    }

def run_alpha_for_profiles(
    model, preprocessor, df_all, profiles_df,
    features_numeric, features_categorical,
    threshold=0.5, q_lo=0.05, q_hi=0.95, n_steps=9
):
    if not features_numeric and not features_categorical:
        return pd.DataFrame(index=profiles_df.index), {}

    num_bounds = {feat: (float(df_all[feat].quantile(q_lo)),
                         float(df_all[feat].quantile(q_hi)))
                  for feat in features_numeric}
    cat_values = {feat: sorted(df_all[feat].dropna().unique().tolist())
                  for feat in features_categorical}

    rows, details = [], {}
    for i in profiles_df.index:
        row = profiles_df.loc[[i]]
        row_result = {}
        for feat in features_numeric:
            res = alpha_test_numeric(model, preprocessor, row, feature=feat,
                                     bounds=num_bounds[feat], threshold=threshold,
                                     n_steps=n_steps, clip_min=0.0)
            row_result[feat] = int(res["is_necessary"])
            details[(i, feat)] = res

        for feat in features_categorical:
            res = alpha_test_categorical(model, preprocessor, row, feature=feat,
                                         all_categories=cat_values[feat], threshold=threshold)
            row_result[feat] = int(res["is_necessary"])
            details[(i, feat)] = res

        row_result["profile_index"] = i
        rows.append(row_result)

    results_table = pd.DataFrame(rows).set_index("profile_index").sort_index()
    return results_table, details

# ---- β: Suffizienz, mit fixiertem Wert a + Zählung ----
def _predict_label_and_proba_batch(model, preprocessor, df, threshold=0.5):
    Xp = preprocessor.transform(df)
    proba = model.predict(Xp, verbose=0).reshape(-1)
    labels = (proba >= threshold).astype(int)
    return labels, proba

def beta_test_feature(
    model, preprocessor, df_all, df_row, feature, value=None,
    threshold=0.5, n_samples=2000, random_state=42
):
    rng = np.random.default_rng(random_state)
    y_star, p_star = _predict_label_and_proba(model, preprocessor, df_row, threshold)
    a = df_row[feature].iloc[0] if value is None else value

    sample_idx = rng.integers(0, df_all.shape[0], size=n_samples)
    Z = df_all.iloc[sample_idx].copy()
    if pd.api.types.is_numeric_dtype(df_all[feature]):
        Z[feature] = float(a)
    else:
        Z[feature] = a

    y_hat, proba = _predict_label_and_proba_batch(model, preprocessor, Z, threshold)
    same = (y_hat == y_star).astype(int)
    n_same = int(same.sum())
    n_diff = int(len(same) - n_same)
    beta = float(n_same / len(same))

    return {
        "feature": feature,
        "a": float(a) if pd.api.types.is_numeric_dtype(df_all[feature]) else a,
        "beta": beta,
        "y_star": int(y_star),
        "proba_star": float(p_star),
        "n_samples": int(n_samples),
        "n_same": n_same,
        "n_diff": n_diff,
    }

def run_beta_for_profiles(
    model, preprocessor, df_all, profiles_df, features_numeric, features_categorical,
    threshold=0.5, n_samples=2000, random_state=42
):
    features = (features_numeric or []) + (features_categorical or [])
    if not features:
        return pd.DataFrame(index=profiles_df.index), {}

    rows, details = [], {}
    for i in profiles_df.index:
        row = profiles_df.loc[[i]]
        row_result = {}
        for feat in features:
            res = beta_test_feature(
                model, preprocessor, df_all, row, feat,
                value=None, threshold=threshold,
                n_samples=n_samples, random_state=random_state + i,
            )
            row_result[feat] = res["beta"]
            details[(i, feat)] = res
        row_result["profile_index"] = i
        rows.append(row_result)

    beta_table = pd.DataFrame(rows).set_index("profile_index").sort_index()
    return beta_table, details

# =========================
# Report-Builder (Markdown) – MIT DETAILSPALTEN
# =========================
def _fmt_prob(p):
    if p is None:
        return "—"
    return f"{p:.3f}"

def _fmt_val(v):
    """Robuste Formatierung: Zahlen kompakt, Kategorien/Strings als String."""
    if v is None:
        return "—"
    # Numerisch?
    try:
        # np.bool_ abfangen, weil bool auch float-castbar ist
        if isinstance(v, (bool, np.bool_)):
            return str(v)
        fv = float(v)
        # Unendlichkeiten vermeiden
        if np.isfinite(fv):
            return f"{fv:.4g}"
        else:
            return str(v)
    except (TypeError, ValueError):
        # Nicht numerisch -> als String ausgeben
        return str(v)

def _alpha_detail_cell(alpha_det: dict):
    """
    Kompakte Zelle für α-Details.
    - Für Numeric: orig → cf (Δ=...) | p:o→n | y:o→n
    - Für Kategorial: orig → cf | p:o→n | y:o→n (Δ nicht sinnvoll -> "—")
    """
    if not alpha_det:
        return "—"

    is_nec = alpha_det.get("is_necessary", False)

    if not is_nec:
        return f"orig={_fmt_val(alpha_det.get('original_value'))} | keine CF-Änderung"

    # notwendig -> wir haben einen cf_value
    orig_val = _fmt_val(alpha_det.get("original_value"))
    cf_val   = _fmt_val(alpha_det.get("cf_value"))
    delta    = _fmt_val(alpha_det.get("delta_abs"))  # bei Kategorien "—"
    p_o      = _fmt_prob(alpha_det.get("original_proba"))
    p_n      = _fmt_prob(alpha_det.get("cf_proba"))
    y_o      = alpha_det.get("original_label", "—")
    y_n      = alpha_det.get("cf_label", "—")

    return f"orig={orig_val} → cf={cf_val} (Δ={delta} | p:{p_o}→{p_n} | y:{y_o}→{y_n})"

def _beta_detail_cell(beta_det: dict):
    """
    Kompakte Zelle für β-Details.
    Zeigt den fixierten Wert a (auch kategorial ok), β, same/diff und y*, p*.
    """
    if not beta_det:
        return "—"
    a_val = _fmt_val(beta_det.get("a"))
    beta  = beta_det.get("beta", None)
    beta_s = f"{beta:.4f}" if isinstance(beta, (float, int, np.floating, np.integer)) else "—"
    n_same = beta_det.get("n_same", "—")
    n_diff = beta_det.get("n_diff", "—")
    y_star = beta_det.get("y_star", "—")
    p_star = _fmt_prob(beta_det.get("proba_star"))
    return f"a={a_val} | β={beta_s} | same/diff={n_same}/{n_diff} | y*={y_star} (p={p_star})"


def build_two_stage_profile_report_md(
    alpha_econ: pd.DataFrame,
    alpha_econ_details: dict,
    beta_econ: pd.DataFrame,
    beta_econ_details: dict,
    econ_features_all: list,
    alpha_socio: pd.DataFrame,
    alpha_socio_details: dict,
    beta_socio: pd.DataFrame,
    beta_socio_details: dict,
    socio_features_all: list,
    suff_threshold: float = 0.7,
    round_beta: int = 4,
    out_path: str = REPORT_PATH
):
    # Sicherstellen, dass die Indizes übereinstimmen
    idx_ref = alpha_econ.index
    assert idx_ref.equals(beta_econ.index)
    assert idx_ref.equals(alpha_socio.index)
    assert idx_ref.equals(beta_socio.index)

    econ_feats = [f for f in econ_features_all if f in alpha_econ.columns and f in beta_econ.columns]
    socio_feats = [f for f in socio_features_all if f in alpha_socio.columns and f in beta_socio.columns]

    md = []
    md.append("# Profilbericht (zweistufig): α/β je Profil – Ökonomisch vs. Soziodemographisch\n")
    md.append(f"- **Anzahl Profile:** {len(idx_ref)}")
    md.append(f"- **Hinreichend-Schwelle:** β ≥ {suff_threshold}")
    md.append(f"- **Ökonomische Features:** {', '.join(econ_feats) if econ_feats else '(keine)'}")
    md.append(f"- **Soziodemographische Features:** {', '.join(socio_feats) if socio_feats else '(keine)'}\n")

    # Überblick je Profil (Anzahlen)
    overview_rows = []
    for pid in idx_ref:
        row = {"profile_index": pid}
        row["econ_alpha_cnt"] = int(alpha_econ.loc[pid, econ_feats].sum()) if econ_feats else 0
        row["econ_hinr_cnt"]  = int((beta_econ.loc[pid, econ_feats] >= suff_threshold).sum()) if econ_feats else 0
        row["socio_alpha_cnt"] = int(alpha_socio.loc[pid, socio_feats].sum()) if socio_feats else 0
        row["socio_hinr_cnt"]  = int((beta_socio.loc[pid, socio_feats] >= suff_threshold).sum()) if socio_feats else 0
        overview_rows.append(row)
    overview_df = pd.DataFrame(overview_rows).set_index("profile_index")
    md.append("## Überblick (Anzahl notwendiger / hinreichender Features je Stufe)\n")
    md.append(overview_df.to_markdown())
    md.append("\n---\n")

    # Details je Profil
    md.append("## Details je Profil\n")
    for pid in idx_ref:
        md.append(f"\n### Profil {pid}\n")

        if econ_feats:
            # Tabelle mit α/β + Detail-Spalten
            rows = []
            for feat in econ_feats:
                alpha_det = alpha_econ_details.get((pid, feat), {})
                beta_det  = beta_econ_details.get((pid, feat), {})
                rows.append({
                    "Feature": feat,
                    "alpha_notwendig": int(alpha_econ.loc[pid, feat]),
                    "beta": round(float(beta_econ.loc[pid, feat]), round_beta),
                    f"α-Details (orig→cf)": _alpha_detail_cell(alpha_det),
                    f"β-Details (fixierter Wert)": _beta_detail_cell(beta_det),
                    f"hinreichend (β≥{suff_threshold:.2f})": int(float(beta_econ.loc[pid, feat]) >= suff_threshold),
                })
            df_e = pd.DataFrame(rows)
            md.append("\n**Ökonomische Features**\n")
            md.append(df_e.to_markdown(index=False))

        if socio_feats:
            rows = []
            for feat in socio_feats:
                alpha_det = alpha_socio_details.get((pid, feat), {})
                beta_det  = beta_socio_details.get((pid, feat), {})
                rows.append({
                    "Feature": feat,
                    "alpha_notwendig": int(alpha_socio.loc[pid, feat]),
                    "beta": round(float(beta_socio.loc[pid, feat]), round_beta),
                    f"α-Details (orig→cf)": _alpha_detail_cell(alpha_det),
                    f"β-Details (fixierter Wert)": _beta_detail_cell(beta_det),
                    f"hinreichend (β≥{suff_threshold:.2f})": int(float(beta_socio.loc[pid, feat]) >= suff_threshold),
                })
            df_s = pd.DataFrame(rows)
            md.append("\n**Soziodemographische/sonstige Features**\n")
            md.append(df_s.to_markdown(index=False))

        md.append("\n---")

    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    with open(out_path, "w", encoding="utf-8") as f:
        f.write("\n".join(md))

# =========================
# AUSFÜHRUNG: Stufe 1 & 2 + Bericht
# =========================
alpha_econ,  alpha_econ_details  = run_alpha_for_profiles(
    model, preprocessor, df_all=X_all, profiles_df=profiles_df,
    features_numeric=econ_num, features_categorical=econ_cat,
    threshold=0.5, q_lo=0.05, q_hi=0.95, n_steps=9
)
beta_econ,   beta_econ_details   = run_beta_for_profiles(
    model, preprocessor, df_all=X_all, profiles_df=profiles_df,
    features_numeric=econ_num, features_categorical=econ_cat,
    threshold=0.5, n_samples=2000, random_state=42
)
alpha_socio, alpha_socio_details = run_alpha_for_profiles(
    model, preprocessor, df_all=X_all, profiles_df=profiles_df,
    features_numeric=socio_num, features_categorical=socio_cat,
    threshold=0.5, q_lo=0.05, q_hi=0.95, n_steps=9
)
beta_socio,  beta_socio_details  = run_beta_for_profiles(
    model, preprocessor, df_all=X_all, profiles_df=profiles_df,
    features_numeric=socio_num, features_categorical=socio_cat,
    threshold=0.5, n_samples=2000, random_state=4242
)

# (optional) Details als CSV persistieren – gut für Nachvollziehbarkeit/Audit
if WRITE_ALPHA_DETAILS_CSV:
    rows = []
    for (pid, feat), d in alpha_econ_details.items():
        dd = d.copy(); dd.update({"profile_index": pid, "feature": feat, "gruppe": "oekonomisch"})
        rows.append(dd)
    for (pid, feat), d in alpha_socio_details.items():
        dd = d.copy(); dd.update({"profile_index": pid, "feature": feat, "gruppe": "sozio"})
        rows.append(dd)
    alpha_details_df = pd.DataFrame(rows)
    alpha_details_df.to_csv(ALPHA_DETAILS_CSV, index=False)

if WRITE_BETA_DETAILS_CSV:
    rows = []
    for (pid, feat), d in beta_econ_details.items():
        dd = d.copy(); dd.update({"profile_index": pid, "feature": feat, "gruppe": "oekonomisch"})
        rows.append(dd)
    for (pid, feat), d in beta_socio_details.items():
        dd = d.copy(); dd.update({"profile_index": pid, "feature": feat, "gruppe": "sozio"})
        rows.append(dd)
    beta_details_df = pd.DataFrame(rows)
    beta_details_df.to_csv(BETA_DETAILS_CSV, index=False)

# Report erzeugen (mit Detailspalten)
build_two_stage_profile_report_md(
    alpha_econ=alpha_econ, alpha_econ_details=alpha_econ_details,
    beta_econ=beta_econ,   beta_econ_details=beta_econ_details,
    econ_features_all=econ_num+econ_cat,
    alpha_socio=alpha_socio, alpha_socio_details=alpha_socio_details,
    beta_socio=beta_socio,   beta_socio_details=beta_socio_details,
    socio_features_all=socio_num+socio_cat,
    suff_threshold=0.7, round_beta=4, out_path=REPORT_PATH
)

print("Feature-Gruppen:")
print("Ökonomisch – numerisch:", econ_num)
print("Ökonomisch – kategorisch:", econ_cat)
print("Soziodemographisch – numerisch:", socio_num)
print("Soziodemographisch – kategorisch:", socio_cat)
print(f"💾 Profilbericht (mit Details) gespeichert unter: {REPORT_PATH}")
if WRITE_ALPHA_DETAILS_CSV:
    print(f"💾 Alpha-Details CSV: {ALPHA_DETAILS_CSV}")
if WRITE_BETA_DETAILS_CSV:
    print(f"💾 Beta-Details CSV:  {BETA_DETAILS_CSV}")

Feature-Gruppen:
Ökonomisch – numerisch: ['Kreditbetrag', 'Dauer_in_Monaten', 'Ratenhöhe', 'Anzahl_bestehender_Kredite']
Ökonomisch – kategorisch: ['Kreditgeschichte', 'Kreditverwendungszweck', 'Sparkonto_Wertpapiere', 'Vermögen', 'Status_des_Girokontos', 'Beschäftigt_seit', 'Andere_Ratenverpflichtungen', 'Weitere_Bürgen_Schuldner']
Soziodemographisch – numerisch: ['Alter', 'Wohnsitzdauer', 'Unterhaltspflichtige_Personen']
Soziodemographisch – kategorisch: ['Familienstand_Geschlecht', 'Wohnsituation', 'Telefon', 'Ausländischer_Arbeiter', 'Beruf']
💾 Profilbericht (mit Details) gespeichert unter: C:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Masterarbeit_Guetekriterien-sichere-und-interpetierbare-Hochrisiko-KI-Systeme/results/profilbericht_alpha_beta_twostage.md
💾 Alpha-Details CSV: c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Masterarbeit_Guetekriterien-sichere-und-interpetierbare-Hochrisiko-KI-Systeme/results/alpha_details.csv
💾 Beta-Details CSV:  c:/Users/JonasNiehu

In [None]:
# =========================
# Schön formatierter Markdown-Report aus Evaluationsergebnissen
# =========================

import os
import numpy as np
import pandas as pd

# === Pfade anpassen (falls nötig) ===
BASE = r"c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Guetekriterien-sichere-und-interpetierbare-Hochrisiko-KI-Systeme/results"
ALPHA_DETAILS_CSV = os.path.join(BASE, "alpha_details.csv")
BETA_DETAILS_CSV  = os.path.join(BASE, "beta_details.csv")
OUT_MD_PATH       = os.path.join(BASE, "profilbericht_alpha_beta_DETAILS_from_results.md")

SUFF_THRESHOLD = 0.70   # β-Schwelle
MAX_TEXT = 70           # max. Zeichen pro Zelle für schönere Zeilen

# --- Laden ---
alpha_df = pd.read_csv(ALPHA_DETAILS_CSV)
beta_df  = pd.read_csv(BETA_DETAILS_CSV)

# --- Hilfsformatierer ---
def _fmt_prob(p):
    if p is None or (isinstance(p, float) and not np.isfinite(p)): return "—"
    try: return f"{float(p):.3f}"
    except: return str(p)

def _trim(s, L=MAX_TEXT):
    s = "—" if (s is None or (isinstance(s, float) and pd.isna(s))) else str(s)
    return s if len(s) <= L else s[:L-1] + "…"

def _fmt_val(v):
    if v is None: return "—"
    try:
        if isinstance(v, (bool, np.bool_)): return str(v)
        fv = float(v)
        return f"{fv:.4g}" if np.isfinite(fv) else str(v)
    except:
        return _trim(v)

def _alpha_pretty_row(row):
    if int(row.get("is_necessary", 0)) != 1:
        return f"orig={_fmt_val(row.get('orig_value'))} | keine CF-Änderung"
    return (
        f"orig={_fmt_val(row.get('orig_value'))} → cf={_fmt_val(row.get('cf_value'))} "
        f"(Δ={_fmt_val(row.get('delta_abs'))}, "
        f"p:{_fmt_prob(row.get('orig_proba'))}→{_fmt_prob(row.get('cf_proba'))}, "
        f"y:{row.get('orig_label','—')}→{row.get('cf_label','—')})"
    )

def _beta_pretty_row(row):
    return (
        f"a={_fmt_val(row.get('a'))} | "
        f"β={_fmt_val(row.get('beta'))} | "
        f"same/diff={row.get('n_same','—')}/{row.get('n_diff','—')} | "
        f"y*={row.get('y_star','—')} (p={_fmt_prob(row.get('proba_star'))})"
    )

# --- Kürzen & Mergen ---
a_keep = ["profile_index","feature","gruppe","is_necessary","orig_value","cf_value","delta_abs","orig_label","orig_proba","cf_label","cf_proba"]
b_keep = ["profile_index","feature","gruppe","a","beta","n_same","n_diff","n_samples","y_star","proba_star"]

A = alpha_df[[c for c in a_keep if c in alpha_df.columns]].copy()
B = beta_df [[c for c in b_keep if c in beta_df.columns]].copy()

merge_on = [c for c in ["profile_index","feature","gruppe"] if c in A.columns and c in B.columns]
M = pd.merge(A, B, on=merge_on, how="inner").sort_values(["profile_index","gruppe","feature"]).reset_index(drop=True)

# Schöne Spalten bauen
M["α (orig→cf)"] = M.apply(_alpha_pretty_row, axis=1)
M["β (fixierter Wert)"] = M.apply(_beta_pretty_row, axis=1)
M["alpha_notwendig"] = M["is_necessary"].astype(int)
M[f"hinreichend (β≥{SUFF_THRESHOLD:.2f})"] = (M["beta"].astype(float) >= SUFF_THRESHOLD).astype(int)

show_cols = ["feature","gruppe","alpha_notwendig","α (orig→cf)","β (fixierter Wert)", f"hinreichend (β≥{SUFF_THRESHOLD:.2f})"]

# --- Markdown mit aufklappbaren Profil-Abschnitten ---
lines = []
lines.append("# Profilbericht – α/β Detailtabellen (aus Evaluationsergebnissen)\n")

for pid, g in M.groupby("profile_index", sort=True):
    lines.append(f"\n<details>\n<summary><b>Profil {pid}</b></summary>\n")
    g = g.sort_values(["gruppe","feature"]).copy()
    g["feature"] = g["feature"].map(_trim)
    g["gruppe"]  = g["gruppe"].map(_trim)
    g["α (orig→cf)"] = g["α (orig→cf)"].map(lambda s: _trim(s, 120))
    g["β (fixierter Wert)"] = g["β (fixierter Wert)"].map(lambda s: _trim(s, 120))
    lines.append("\n\n" + g[show_cols].to_markdown(index=False) + "\n")
    lines.append("</details>\n\n---")

os.makedirs(os.path.dirname(OUT_MD_PATH), exist_ok=True)
with open(OUT_MD_PATH, "w", encoding="utf-8") as f:
    f.write("\n".join(lines))

print(f"✅ Markdown-Report gespeichert unter:\n{OUT_MD_PATH}")


✅ Markdown-Report gespeichert unter:
c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Ergebnisse\profilbericht_alpha_beta_DETAILS_from_results.md


In [13]:
# =========================
# Hübscher HTML-Report mit Pandas Styler
# =========================

import os
import numpy as np
import pandas as pd

BASE = r"c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Ergebnisse"
ALPHA_DETAILS_CSV = os.path.join(BASE, "alpha_details.csv")
BETA_DETAILS_CSV  = os.path.join(BASE, "beta_details.csv")
OUT_HTML_PATH     = os.path.join(BASE, "profilbericht_alpha_beta_DETAILS_from_results.html")

SUFF_THRESHOLD = 0.70
MAX_TEXT = 120

# --- Laden ---
alpha_df = pd.read_csv(ALPHA_DETAILS_CSV)
beta_df  = pd.read_csv(BETA_DETAILS_CSV)

def _fmt_prob(p):
    if p is None or (isinstance(p, float) and not np.isfinite(p)): return "—"
    try: return f"{float(p):.3f}"
    except: return str(p)

def _trim(s, L=MAX_TEXT):
    s = "—" if (s is None or (isinstance(s, float) and pd.isna(s))) else str(s)
    return s if len(s) <= L else s[:L-1] + "…"

def _fmt_val(v):
    if v is None: return "—"
    try:
        if isinstance(v, (bool, np.bool_)): return str(v)
        fv = float(v)
        return f"{fv:.4g}" if np.isfinite(fv) else str(v)
    except:
        return _trim(v)

def _alpha_pretty_row(row):
    if int(row.get("is_necessary", 0)) != 1:
        return f"orig={_fmt_val(row.get('orig_value'))} | keine CF-Änderung"
    return (
        f"orig={_fmt_val(row.get('orig_value'))} → cf={_fmt_val(row.get('cf_value'))} "
        f"(Δ={_fmt_val(row.get('delta_abs'))}, "
        f"p:{_fmt_prob(row.get('orig_proba'))}→{_fmt_prob(row.get('cf_proba'))}, "
        f"y:{row.get('orig_label','—')}→{row.get('cf_label','—')})"
    )

def _beta_pretty_row(row):
    return (
        f"a={_fmt_val(row.get('a'))} | "
        f"β={_fmt_val(row.get('beta'))} | "
        f"same/diff={row.get('n_same','—')}/{row.get('n_diff','—')} | "
        f"y*={row.get('y_star','—')} (p={_fmt_prob(row.get('proba_star'))})"
    )

# Mergen
A = alpha_df[["profile_index","feature","gruppe","is_necessary","orig_value","cf_value","delta_abs","orig_label","orig_proba","cf_label","cf_proba"]].copy()
B = beta_df [["profile_index","feature","gruppe","a","beta","n_same","n_diff","n_samples","y_star","proba_star"]].copy()
M = pd.merge(A, B, on=["profile_index","feature","gruppe"], how="inner").sort_values(["profile_index","gruppe","feature"]).reset_index(drop=True)

M["α (orig→cf)"] = M.apply(_alpha_pretty_row, axis=1)
M["β (fixierter Wert)"] = M.apply(_beta_pretty_row, axis=1)
M["alpha_notwendig"] = M["is_necessary"].astype(int)
M[f"hinreichend (β≥{SUFF_THRESHOLD:.2f})"] = (M["beta"].astype(float) >= SUFF_THRESHOLD).astype(int)

show_cols = ["profile_index","feature","gruppe","alpha_notwendig","α (orig→cf)","β (fixierter Wert)", f"hinreichend (β≥{SUFF_THRESHOLD:.2f})"]
T = M[show_cols].copy()

# Styling (Zeilenumbruch, Spaltenbreiten, sticky header)
sty = (
    T.style
    .set_table_styles([
        {"selector":"th", "props":[("position","sticky"),("top","0"),("background","#f7f7f7"),("z-index","1")]},
        {"selector":"table", "props":[("border-collapse","collapse"),("font-size","14px"),("width","100%")]},
    ])
    .set_properties(subset=["feature","gruppe"], **{"white-space":"normal","word-wrap":"break-word","max-width":"220px"})
    .set_properties(subset=["α (orig→cf)","β (fixierter Wert)"], **{"font-family":"ui-monospace, SFMono-Regular, Menlo, Consolas, 'Liberation Mono', monospace",
                                                                     "white-space":"normal","word-wrap":"break-word","max-width":"520px"})
    .hide(axis="index")
)

html = """<h1>Profilbericht – α/β Detailtabellen (aus Evaluationsergebnissen)</h1>
<p>β-Schwelle (hinreichend): <b>{:.2f}</b></p>
""".format(SUFF_THRESHOLD) + sty.to_html()

os.makedirs(os.path.dirname(OUT_HTML_PATH), exist_ok=True)
with open(OUT_HTML_PATH, "w", encoding="utf-8") as f:
    f.write(html)

print(f"✅ HTML-Report gespeichert unter:\n{OUT_HTML_PATH}")


KeyError: "['orig_value', 'orig_label', 'orig_proba'] not in index"

In [3]:
# =========================
# Auswertung_Profiles.ipynb – liest nur Evaluationsergebnisse (CSV/MD)
# =========================

import os
import numpy as np
import pandas as pd

# --- Pfade zu den Evaluationsergebnissen (aus dem Evaluation-Notebook) ---
# > bei Bedarf hier anpassen
BASE = r"c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Ergebnisse"

ALPHA_DETAILS_CSV = os.path.join(BASE, "c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Ergebnisse/alpha_details.csv")
BETA_DETAILS_CSV  = os.path.join(BASE, "c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Ergebnisse/beta_details.csv")
REPORT_PATH       = os.path.join(BASE, "c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Ergebnisse/profilbericht_alpha_beta_twostage.md")  # optional, wird hier nicht zwingend benötigt

# --- Parameter ---
SUFF_THRESHOLD = 0.7   # β-Schwelle für "hinreichend"
ROUND_BETA     = 4     # Rundung β-Werte
EXPORT_MD      = True  # Falls True: exportiert ein Markdown-Report (s.u.)
OUT_MD_PATH    = os.path.join(BASE, "profilbericht_alpha_beta_DETAILS_from_results.md")

print("ALPHA_DETAILS_CSV:", ALPHA_DETAILS_CSV)
print("BETA_DETAILS_CSV :", BETA_DETAILS_CSV)
print("REPORT_PATH      :", REPORT_PATH)


ALPHA_DETAILS_CSV: c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Ergebnisse/alpha_details.csv
BETA_DETAILS_CSV : c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Ergebnisse/beta_details.csv
REPORT_PATH      : c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Ergebnisse/profilbericht_alpha_beta_twostage.md


In [4]:
# --- Robust laden + Validierungen ---
if not os.path.exists(ALPHA_DETAILS_CSV):
    raise FileNotFoundError(f"Alpha-Details CSV nicht gefunden: {ALPHA_DETAILS_CSV}")
if not os.path.exists(BETA_DETAILS_CSV):
    raise FileNotFoundError(f"Beta-Details CSV nicht gefunden: {BETA_DETAILS_CSV}")

alpha_details_df = pd.read_csv(ALPHA_DETAILS_CSV)
beta_details_df  = pd.read_csv(BETA_DETAILS_CSV)

# Sanity-Checks/Angleichungen
for col in ["profile_index", "feature", "gruppe"]:
    if col in alpha_details_df.columns:
        alpha_details_df[col] = alpha_details_df[col]
    if col in beta_details_df.columns:
        beta_details_df[col] = beta_details_df[col]

# Beta-Postprocessing (Prozente etc. – falls noch nicht enthalten)
def postprocess_beta_details(beta_details_df: pd.DataFrame) -> pd.DataFrame:
    df = beta_details_df.copy()
    if "beta" in df.columns:
        df["beta"] = df["beta"].astype(float)
        df["beta_pct"] = (df["beta"] * 100).round(2)
    if {"n_same","n_diff","n_samples"}.issubset(df.columns):
        df["same_pct"] = (df["n_same"] / df["n_samples"] * 100).round(2)
        df["diff_pct"] = (df["n_diff"] / df["n_samples"] * 100).round(2)
    sort_cols = [c for c in ["profile_index","gruppe","feature"] if c in df.columns]
    if sort_cols:
        df = df.sort_values(sort_cols).reset_index(drop=True)
    return df

beta_details_df = postprocess_beta_details(beta_details_df)

print("alpha_details_df:", alpha_details_df.shape)
print("beta_details_df :", beta_details_df.shape)
alpha_details_df.head(2), beta_details_df.head(2)


alpha_details_df: (200, 11)
beta_details_df : (200, 13)


(   is_necessary  original_label  original_proba original_value cf_value  \
 0         False               1        0.551289         6458.0      NaN   
 1          True               1        0.551289           18.0     48.0   
 
    delta_abs  cf_label  cf_proba  profile_index           feature       gruppe  
 0        NaN       NaN       NaN              0      Kreditbetrag  oekonomisch  
 1       30.0       0.0  0.493949              0  Dauer_in_Monaten  oekonomisch  ,
                        feature     a   beta  y_star  proba_star  n_samples  \
 0  Andere_Ratenverpflichtungen  Bank  0.763       1    0.551289       2000   
 1   Anzahl_bestehender_Kredite   2.0  0.764       1    0.551289       2000   
 
    n_same  n_diff  profile_index       gruppe  beta_pct  same_pct  diff_pct  
 0    1526     474              0  oekonomisch      76.3      76.3      23.7  
 1    1528     472              0  oekonomisch      76.4      76.4      23.6  )

In [5]:
def _fmt_prob(p):
    if p is None or (isinstance(p, float) and not np.isfinite(p)): 
        return "—"
    try:
        return f"{float(p):.3f}"
    except Exception:
        return str(p)

def _fmt_val(v, maxlen=60):
    if v is None: 
        return "—"
    try:
        if isinstance(v, (bool, np.bool_)): 
            return str(v)
        fv = float(v)
        return f"{fv:.4g}" if np.isfinite(fv) else str(v)
    except Exception:
        s = str(v)
        return (s[:maxlen-1] + "…") if len(s) > maxlen else s


In [6]:
def _alpha_pretty_row(row: pd.Series) -> str:
    """orig→cf (Δ, p:orig→cf, y:orig→cf) oder 'keine CF-Änderung'"""
    is_nec = int(row.get("is_necessary", 0)) == 1
    if not is_nec:
        return f"orig={_fmt_val(row.get('orig_value'))} | keine CF-Änderung"
    return (
        f"orig={_fmt_val(row.get('orig_value'))} → cf={_fmt_val(row.get('cf_value'))} "
        f"(Δ={_fmt_val(row.get('delta_abs'))}, "
        f"p:{_fmt_prob(row.get('orig_proba'))}→{_fmt_prob(row.get('cf_proba'))}, "
        f"y:{row.get('orig_label','—')}→{row.get('cf_label','—')})"
    )

def _beta_pretty_row(row: pd.Series) -> str:
    """a, β, same/diff, y*, p*"""
    return (
        f"a={_fmt_val(row.get('a'))} | "
        f"β={_fmt_val(row.get('beta'))} | "
        f"same/diff={row.get('n_same','—')}/{row.get('n_diff','—')} | "
        f"y*={row.get('y_star','—')} (p={_fmt_prob(row.get('proba_star'))})"
    )


In [7]:
# Relevante Spalten aus α/β-Details auswählen
a_keep = ["profile_index","feature","gruppe",
          "is_necessary","orig_value","cf_value","delta_abs","orig_label","orig_proba","cf_label","cf_proba"]
b_keep = ["profile_index","feature","gruppe",
          "a","beta","n_same","n_diff","n_samples","y_star","proba_star"]

a_df = alpha_details_df[[c for c in a_keep if c in alpha_details_df.columns]].copy()
b_df = beta_details_df [[c for c in b_keep if c in beta_details_df.columns]].copy()

merge_cols = [c for c in ["profile_index","feature","gruppe"] if c in a_df.columns and c in b_df.columns]
merged = pd.merge(a_df, b_df, on=merge_cols, how="inner")

# Sortierung & hübsche Spalten bauen
sort_cols = [c for c in ["profile_index","gruppe","feature"] if c in merged.columns]
merged = merged.sort_values(sort_cols).reset_index(drop=True)

merged["α (orig→cf)"] = merged.apply(_alpha_pretty_row, axis=1)
merged["β (fixierter Wert)"] = merged.apply(_beta_pretty_row, axis=1)
merged["alpha_notwendig"] = merged["is_necessary"].astype(int)
merged[f"hinreichend (β≥{SUFF_THRESHOLD:.2f})"] = (merged["beta"].astype(float) >= SUFF_THRESHOLD).astype(int)

# Finale Ansichtsspalten
show_cols = ["feature","gruppe","alpha_notwendig","α (orig→cf)","β (fixierter Wert)", f"hinreichend (β≥{SUFF_THRESHOLD:.2f})"]
show_cols = [c for c in show_cols if c in merged.columns]

print(f"Zusammengeführte Detailtabelle: {merged.shape}")
merged.head(3)[show_cols]


Zusammengeführte Detailtabelle: (200, 19)


Unnamed: 0,feature,gruppe,alpha_notwendig,α (orig→cf),β (fixierter Wert),hinreichend (β≥0.70)
0,Andere_Ratenverpflichtungen,oekonomisch,1,"orig=— → cf=A141 (Δ=nan, p:—→0.494, y:—→0.0)",a=Bank | β=0.763 | same/diff=1526/474 | y*=1 (...,1
1,Anzahl_bestehender_Kredite,oekonomisch,0,orig=— | keine CF-Änderung,a=2 | β=0.764 | same/diff=1528/472 | y*=1 (p=0...,1
2,Beschäftigt_seit,oekonomisch,0,orig=— | keine CF-Änderung,a=.. >= 7 Jahre | β=0.7605 | same/diff=1521/47...,1


In [8]:
# Übersicht je Profil: Anzahl notwendiger und hinreichender Features pro Gruppe
overview_rows = []
for pid, gdf in merged.groupby("profile_index", sort=True):
    row = {"profile_index": pid}
    # ökonomisch
    econ = gdf[gdf["gruppe"] == "oekonomisch"]
    row["econ_alpha_cnt"] = int(econ["alpha_notwendig"].sum()) if not econ.empty else 0
    row["econ_hinr_cnt"]  = int(econ[f"hinreichend (β≥{SUFF_THRESHOLD:.2f})"].sum()) if not econ.empty else 0
    # sozio
    sozi = gdf[gdf["gruppe"] == "sozio"]
    row["socio_alpha_cnt"] = int(sozi["alpha_notwendig"].sum()) if not sozi.empty else 0
    row["socio_hinr_cnt"]  = int(sozi[f"hinreichend (β≥{SUFF_THRESHOLD:.2f})"].sum()) if not sozi.empty else 0
    overview_rows.append(row)

overview_df = pd.DataFrame(overview_rows).set_index("profile_index").sort_index()
display(overview_df.style.format(precision=0))


Unnamed: 0_level_0,econ_alpha_cnt,econ_hinr_cnt,socio_alpha_cnt,socio_hinr_cnt
profile_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,7,12,2,8
1,1,0,0,0
2,1,0,0,0
3,11,12,6,8
4,6,0,5,0
5,11,0,7,0
6,1,0,0,0
7,0,12,0,8
8,1,12,0,8
9,2,11,3,8


In [9]:
# Anzeige: Für jedes Profil eine kompakte Tabelle (Features, α/β-Details)
for pid, gdf in merged.groupby("profile_index", sort=True):
    print(f"\n=== Profil {pid} ===")
    # optional: zuerst ökonomisch, dann sozio
    gdf = gdf.sort_values(["gruppe","feature"])
    display(gdf[show_cols])



=== Profil 0 ===


Unnamed: 0,feature,gruppe,alpha_notwendig,α (orig→cf),β (fixierter Wert),hinreichend (β≥0.70)
0,Andere_Ratenverpflichtungen,oekonomisch,1,"orig=— → cf=A141 (Δ=nan, p:—→0.494, y:—→0.0)",a=Bank | β=0.763 | same/diff=1526/474 | y*=1 (...,1
1,Anzahl_bestehender_Kredite,oekonomisch,0,orig=— | keine CF-Änderung,a=2 | β=0.764 | same/diff=1528/472 | y*=1 (p=0...,1
2,Beschäftigt_seit,oekonomisch,0,orig=— | keine CF-Änderung,a=.. >= 7 Jahre | β=0.7605 | same/diff=1521/47...,1
3,Dauer_in_Monaten,oekonomisch,1,"orig=— → cf=48 (Δ=30, p:—→0.494, y:—→0.0)",a=18 | β=0.8355 | same/diff=1671/329 | y*=1 (p...,1
4,Kreditbetrag,oekonomisch,0,orig=— | keine CF-Änderung,a=6458 | β=0.7605 | same/diff=1521/479 | y*=1 ...,1
5,Kreditgeschichte,oekonomisch,1,"orig=— → cf=A30 (Δ=nan, p:—→0.492, y:—→0.0)",a=alle Kredite bei dieser Bank wurden ordnungs...,1
6,Kreditverwendungszweck,oekonomisch,1,"orig=— → cf=A46 (Δ=nan, p:—→0.475, y:—→0.0)",a=Auto (neu) | β=0.7435 | same/diff=1487/513 |...,1
7,Ratenhöhe,oekonomisch,1,"orig=— → cf=3.25 (Δ=1.25, p:—→0.491, y:—→0.0)",a=2 | β=0.847 | same/diff=1694/306 | y*=1 (p=0...,1
8,Sparkonto_Wertpapiere,oekonomisch,1,"orig=— → cf=A61 (Δ=nan, p:—→0.479, y:—→0.0)",a=... < 100 DM | β=0.8195 | same/diff=1639/361...,1
9,Status_des_Girokontos,oekonomisch,1,"orig=— → cf=A11 (Δ=nan, p:—→0.474, y:—→0.0)",a=kein Girokonto | β=0.823 | same/diff=1646/35...,1



=== Profil 1 ===


Unnamed: 0,feature,gruppe,alpha_notwendig,α (orig→cf),β (fixierter Wert),hinreichend (β≥0.70)
20,Andere_Ratenverpflichtungen,oekonomisch,0,orig=— | keine CF-Änderung,a=Keine | β=0.2425 | same/diff=485/1515 | y*=0...,0
21,Anzahl_bestehender_Kredite,oekonomisch,0,orig=— | keine CF-Änderung,a=1 | β=0.22 | same/diff=440/1560 | y*=0 (p=0....,0
22,Beschäftigt_seit,oekonomisch,0,orig=— | keine CF-Änderung,a=1 <= ... < 4 Jahre | β=0.2515 | same/diff=50...,0
23,Dauer_in_Monaten,oekonomisch,1,"orig=— → cf=32.25 (Δ=39.75, p:—→0.503, y:—→1.0)",a=72 | β=0.68 | same/diff=1360/640 | y*=0 (p=0...,0
24,Kreditbetrag,oekonomisch,0,orig=— | keine CF-Änderung,a=5595 | β=0.244 | same/diff=488/1512 | y*=0 (...,0
25,Kreditgeschichte,oekonomisch,0,orig=— | keine CF-Änderung,a=bestehende Kredite wurden bisher ordnungsgem...,0
26,Kreditverwendungszweck,oekonomisch,0,orig=— | keine CF-Änderung,a=Radio/TV | β=0.2545 | same/diff=509/1491 | y...,0
27,Ratenhöhe,oekonomisch,0,orig=— | keine CF-Änderung,a=2 | β=0.157 | same/diff=314/1686 | y*=0 (p=0...,0
28,Sparkonto_Wertpapiere,oekonomisch,0,orig=— | keine CF-Änderung,a=100 <= ... < 500 DM | β=0.192 | same/diff=38...,0
29,Status_des_Girokontos,oekonomisch,0,orig=— | keine CF-Änderung,a=0 <= ... < 200 DM | β=0.188 | same/diff=376/...,0



=== Profil 2 ===


Unnamed: 0,feature,gruppe,alpha_notwendig,α (orig→cf),β (fixierter Wert),hinreichend (β≥0.70)
40,Andere_Ratenverpflichtungen,oekonomisch,0,orig=— | keine CF-Änderung,a=Bank | β=0.247 | same/diff=494/1506 | y*=0 (...,0
41,Anzahl_bestehender_Kredite,oekonomisch,1,"orig=— → cf=1.25 (Δ=2.75, p:—→0.501, y:—→1.0)",a=4 | β=0.3605 | same/diff=721/1279 | y*=0 (p=...,0
42,Beschäftigt_seit,oekonomisch,0,orig=— | keine CF-Änderung,a=... < 1 Jahr | β=0.2535 | same/diff=507/1493...,0
43,Dauer_in_Monaten,oekonomisch,0,orig=— | keine CF-Änderung,a=12 | β=0.143 | same/diff=286/1714 | y*=0 (p=...,0
44,Kreditbetrag,oekonomisch,0,orig=— | keine CF-Änderung,a=951 | β=0.1875 | same/diff=375/1625 | y*=0 (...,0
45,Kreditgeschichte,oekonomisch,0,orig=— | keine CF-Änderung,a=bestehende Kredite wurden bisher ordnungsgem...,0
46,Kreditverwendungszweck,oekonomisch,0,orig=— | keine CF-Änderung,a=Möbel/Ausstattung | β=0.257 | same/diff=514/...,0
47,Ratenhöhe,oekonomisch,0,orig=— | keine CF-Änderung,a=4 | β=0.2705 | same/diff=541/1459 | y*=0 (p=...,0
48,Sparkonto_Wertpapiere,oekonomisch,0,orig=— | keine CF-Änderung,a=100 <= ... < 500 DM | β=0.1955 | same/diff=3...,0
49,Status_des_Girokontos,oekonomisch,0,orig=— | keine CF-Änderung,a=0 <= ... < 200 DM | β=0.1915 | same/diff=383...,0



=== Profil 3 ===


Unnamed: 0,feature,gruppe,alpha_notwendig,α (orig→cf),β (fixierter Wert),hinreichend (β≥0.70)
60,Andere_Ratenverpflichtungen,oekonomisch,1,"orig=— → cf=A141 (Δ=nan, p:—→0.487, y:—→0.0)",a=Keine | β=0.7395 | same/diff=1479/521 | y*=1...,1
61,Anzahl_bestehender_Kredite,oekonomisch,1,"orig=— → cf=1.125 (Δ=0.125, p:—→0.499, y:—→0.0)",a=1 | β=0.775 | same/diff=1550/450 | y*=1 (p=0...,1
62,Beschäftigt_seit,oekonomisch,1,"orig=— → cf=A71 (Δ=nan, p:—→0.427, y:—→0.0)",a=... < 1 Jahr | β=0.738 | same/diff=1476/524 ...,1
63,Dauer_in_Monaten,oekonomisch,1,"orig=— → cf=16.5 (Δ=4.5, p:—→0.473, y:—→0.0)",a=12 | β=0.8555 | same/diff=1711/289 | y*=1 (p...,1
64,Kreditbetrag,oekonomisch,1,"orig=— → cf=1766 (Δ=1157, p:—→0.499, y:—→0.0)",a=609 | β=0.8035 | same/diff=1607/393 | y*=1 (...,1
65,Kreditgeschichte,oekonomisch,1,"orig=— → cf=A30 (Δ=nan, p:—→0.366, y:—→0.0)",a=alle Kredite bei dieser Bank wurden ordnungs...,1
66,Kreditverwendungszweck,oekonomisch,1,"orig=— → cf=A40 (Δ=nan, p:—→0.487, y:—→0.0)",a=Geschäft | β=0.7345 | same/diff=1469/531 | y...,1
67,Ratenhöhe,oekonomisch,0,orig=— | keine CF-Änderung,a=4 | β=0.7105 | same/diff=1421/579 | y*=1 (p=...,1
68,Sparkonto_Wertpapiere,oekonomisch,1,"orig=— → cf=A61 (Δ=nan, p:—→0.439, y:—→0.0)",a=... < 100 DM | β=0.8085 | same/diff=1617/383...,1
69,Status_des_Girokontos,oekonomisch,1,"orig=— → cf=A11 (Δ=nan, p:—→0.397, y:—→0.0)",a=... >= 200 DM / Gehaltspfändung mindestens 1...,1



=== Profil 4 ===


Unnamed: 0,feature,gruppe,alpha_notwendig,α (orig→cf),β (fixierter Wert),hinreichend (β≥0.70)
80,Andere_Ratenverpflichtungen,oekonomisch,0,orig=— | keine CF-Änderung,a=Keine | β=0.2455 | same/diff=491/1509 | y*=0...,0
81,Anzahl_bestehender_Kredite,oekonomisch,0,orig=— | keine CF-Änderung,a=1 | β=0.216 | same/diff=432/1568 | y*=0 (p=0...,0
82,Beschäftigt_seit,oekonomisch,1,"orig=— → cf=A75 (Δ=nan, p:—→0.501, y:—→1.0)",a=... < 1 Jahr | β=0.2525 | same/diff=505/1495...,0
83,Dauer_in_Monaten,oekonomisch,1,"orig=— → cf=48 (Δ=6, p:—→0.510, y:—→1.0)",a=54 | β=0.501 | same/diff=1002/998 | y*=0 (p=...,0
84,Kreditbetrag,oekonomisch,0,orig=— | keine CF-Änderung,a=1.594e+04 | β=0.3855 | same/diff=771/1229 | ...,0
85,Kreditgeschichte,oekonomisch,1,"orig=— → cf=A33 (Δ=nan, p:—→0.524, y:—→1.0)",a=keine Kredite aufgenommen / alle Kredite wur...,0
86,Kreditverwendungszweck,oekonomisch,1,"orig=— → cf=A40 (Δ=nan, p:—→0.509, y:—→1.0)",a=Geschäft | β=0.257 | same/diff=514/1486 | y*...,0
87,Ratenhöhe,oekonomisch,0,orig=— | keine CF-Änderung,a=3 | β=0.2105 | same/diff=421/1579 | y*=0 (p=...,0
88,Sparkonto_Wertpapiere,oekonomisch,0,orig=— | keine CF-Änderung,a=... < 100 DM | β=0.193 | same/diff=386/1614 ...,0
89,Status_des_Girokontos,oekonomisch,1,"orig=— → cf=A13 (Δ=nan, p:—→0.524, y:—→1.0)",a=0 <= ... < 200 DM | β=0.196 | same/diff=392/...,0



=== Profil 5 ===


Unnamed: 0,feature,gruppe,alpha_notwendig,α (orig→cf),β (fixierter Wert),hinreichend (β≥0.70)
100,Andere_Ratenverpflichtungen,oekonomisch,1,"orig=— → cf=A143 (Δ=nan, p:—→0.536, y:—→1.0)",a=Bank | β=0.2465 | same/diff=493/1507 | y*=0 ...,0
101,Anzahl_bestehender_Kredite,oekonomisch,1,"orig=— → cf=1.875 (Δ=0.125, p:—→0.505, y:—→1.0)",a=2 | β=0.2505 | same/diff=501/1499 | y*=0 (p=...,0
102,Beschäftigt_seit,oekonomisch,1,"orig=— → cf=A72 (Δ=nan, p:—→0.518, y:—→1.0)",a=.. >= 7 Jahre | β=0.2535 | same/diff=507/149...,0
103,Dauer_in_Monaten,oekonomisch,1,"orig=— → cf=21.75 (Δ=2.25, p:—→0.512, y:—→1.0)",a=24 | β=0.209 | same/diff=418/1582 | y*=0 (p=...,0
104,Kreditbetrag,oekonomisch,0,orig=— | keine CF-Änderung,a=2223 | β=0.2 | same/diff=400/1600 | y*=0 (p=...,0
105,Kreditgeschichte,oekonomisch,1,"orig=— → cf=A32 (Δ=nan, p:—→0.517, y:—→1.0)",a=kritisches Konto / andere Kredite bestehen (...,0
106,Kreditverwendungszweck,oekonomisch,1,"orig=— → cf=A41 (Δ=nan, p:—→0.657, y:—→1.0)",a=Radio/TV | β=0.273 | same/diff=546/1454 | y*...,0
107,Ratenhöhe,oekonomisch,1,"orig=— → cf=3.625 (Δ=0.375, p:—→0.521, y:—→1.0)",a=4 | β=0.281 | same/diff=562/1438 | y*=0 (p=0...,0
108,Sparkonto_Wertpapiere,oekonomisch,1,"orig=— → cf=A63 (Δ=nan, p:—→0.526, y:—→1.0)",a=100 <= ... < 500 DM | β=0.19 | same/diff=380...,0
109,Status_des_Girokontos,oekonomisch,1,"orig=— → cf=A12 (Δ=nan, p:—→0.505, y:—→1.0)",a=kein Girokonto | β=0.1845 | same/diff=369/16...,0



=== Profil 6 ===


Unnamed: 0,feature,gruppe,alpha_notwendig,α (orig→cf),β (fixierter Wert),hinreichend (β≥0.70)
120,Andere_Ratenverpflichtungen,oekonomisch,0,orig=— | keine CF-Änderung,a=Keine | β=0.252 | same/diff=504/1496 | y*=0 ...,0
121,Anzahl_bestehender_Kredite,oekonomisch,0,orig=— | keine CF-Änderung,a=2 | β=0.2455 | same/diff=491/1509 | y*=0 (p=...,0
122,Beschäftigt_seit,oekonomisch,0,orig=— | keine CF-Änderung,a=4 <= ... < 7 Jahre | β=0.26 | same/diff=520/...,0
123,Dauer_in_Monaten,oekonomisch,1,"orig=— → cf=11.25 (Δ=48.75, p:—→0.503, y:—→1.0)",a=60 | β=0.5705 | same/diff=1141/859 | y*=0 (p...,0
124,Kreditbetrag,oekonomisch,0,orig=— | keine CF-Änderung,a=1.565e+04 | β=0.3815 | same/diff=763/1237 | ...,0
125,Kreditgeschichte,oekonomisch,0,orig=— | keine CF-Änderung,a=Verzögerungen bei der Rückzahlung in der Ver...,0
126,Kreditverwendungszweck,oekonomisch,0,orig=— | keine CF-Änderung,a=Radio/TV | β=0.26 | same/diff=520/1480 | y*=...,0
127,Ratenhöhe,oekonomisch,0,orig=— | keine CF-Änderung,a=2 | β=0.163 | same/diff=326/1674 | y*=0 (p=0...,0
128,Sparkonto_Wertpapiere,oekonomisch,0,orig=— | keine CF-Änderung,a=... < 100 DM | β=0.19 | same/diff=380/1620 |...,0
129,Status_des_Girokontos,oekonomisch,0,orig=— | keine CF-Änderung,a=kein Girokonto | β=0.1955 | same/diff=391/16...,0



=== Profil 7 ===


Unnamed: 0,feature,gruppe,alpha_notwendig,α (orig→cf),β (fixierter Wert),hinreichend (β≥0.70)
140,Andere_Ratenverpflichtungen,oekonomisch,0,orig=— | keine CF-Änderung,a=Keine | β=0.7675 | same/diff=1535/465 | y*=1...,1
141,Anzahl_bestehender_Kredite,oekonomisch,0,orig=— | keine CF-Änderung,a=3 | β=0.719 | same/diff=1438/562 | y*=1 (p=0...,1
142,Beschäftigt_seit,oekonomisch,0,orig=— | keine CF-Änderung,a=1 <= ... < 4 Jahre | β=0.765 | same/diff=153...,1
143,Dauer_in_Monaten,oekonomisch,0,orig=— | keine CF-Änderung,a=6 | β=0.8985 | same/diff=1797/203 | y*=1 (p=...,1
144,Kreditbetrag,oekonomisch,0,orig=— | keine CF-Änderung,a=1299 | β=0.821 | same/diff=1642/358 | y*=1 (...,1
145,Kreditgeschichte,oekonomisch,0,orig=— | keine CF-Änderung,a=kritisches Konto / andere Kredite bestehen (...,1
146,Kreditverwendungszweck,oekonomisch,0,orig=— | keine CF-Änderung,a=Auto (neu) | β=0.753 | same/diff=1506/494 | ...,1
147,Ratenhöhe,oekonomisch,0,orig=— | keine CF-Änderung,a=1 | β=0.891 | same/diff=1782/218 | y*=1 (p=0...,1
148,Sparkonto_Wertpapiere,oekonomisch,0,orig=— | keine CF-Änderung,a=... < 100 DM | β=0.8255 | same/diff=1651/349...,1
149,Status_des_Girokontos,oekonomisch,0,orig=— | keine CF-Änderung,a=... >= 200 DM / Gehaltspfändung mindestens 1...,1



=== Profil 8 ===


Unnamed: 0,feature,gruppe,alpha_notwendig,α (orig→cf),β (fixierter Wert),hinreichend (β≥0.70)
160,Andere_Ratenverpflichtungen,oekonomisch,0,orig=— | keine CF-Änderung,a=Keine | β=0.759 | same/diff=1518/482 | y*=1 ...,1
161,Anzahl_bestehender_Kredite,oekonomisch,0,orig=— | keine CF-Änderung,a=1 | β=0.7875 | same/diff=1575/425 | y*=1 (p=...,1
162,Beschäftigt_seit,oekonomisch,0,orig=— | keine CF-Änderung,a=... < 1 Jahr | β=0.756 | same/diff=1512/488 ...,1
163,Dauer_in_Monaten,oekonomisch,0,orig=— | keine CF-Änderung,a=4 | β=0.903 | same/diff=1806/194 | y*=1 (p=0...,1
164,Kreditbetrag,oekonomisch,0,orig=— | keine CF-Änderung,a=601 | β=0.816 | same/diff=1632/368 | y*=1 (p...,1
165,Kreditgeschichte,oekonomisch,0,orig=— | keine CF-Änderung,a=bestehende Kredite wurden bisher ordnungsgem...,1
166,Kreditverwendungszweck,oekonomisch,0,orig=— | keine CF-Änderung,a=Möbel/Ausstattung | β=0.7525 | same/diff=150...,1
167,Ratenhöhe,oekonomisch,1,"orig=— → cf=3.625 (Δ=2.625, p:—→0.483, y:—→0.0)",a=1 | β=0.874 | same/diff=1748/252 | y*=1 (p=0...,1
168,Sparkonto_Wertpapiere,oekonomisch,0,orig=— | keine CF-Änderung,a=... < 100 DM | β=0.8205 | same/diff=1641/359...,1
169,Status_des_Girokontos,oekonomisch,0,orig=— | keine CF-Änderung,a=kein Girokonto | β=0.8175 | same/diff=1635/3...,1



=== Profil 9 ===


Unnamed: 0,feature,gruppe,alpha_notwendig,α (orig→cf),β (fixierter Wert),hinreichend (β≥0.70)
180,Andere_Ratenverpflichtungen,oekonomisch,0,orig=— | keine CF-Änderung,a=Bank | β=0.7565 | same/diff=1513/487 | y*=1 ...,1
181,Anzahl_bestehender_Kredite,oekonomisch,1,"orig=— → cf=1.625 (Δ=0.625, p:—→0.492, y:—→0.0)",a=1 | β=0.7895 | same/diff=1579/421 | y*=1 (p=...,1
182,Beschäftigt_seit,oekonomisch,0,orig=— | keine CF-Änderung,a=arbeitslos | β=0.7665 | same/diff=1533/467 |...,1
183,Dauer_in_Monaten,oekonomisch,0,orig=— | keine CF-Änderung,a=42 | β=0.6225 | same/diff=1245/755 | y*=1 (p...,0
184,Kreditbetrag,oekonomisch,0,orig=— | keine CF-Änderung,a=9283 | β=0.724 | same/diff=1448/552 | y*=1 (...,1
185,Kreditgeschichte,oekonomisch,0,orig=— | keine CF-Änderung,a=alle Kredite bei dieser Bank wurden ordnungs...,1
186,Kreditverwendungszweck,oekonomisch,0,orig=— | keine CF-Änderung,a=Auto (gebraucht) | β=0.7405 | same/diff=1481...,1
187,Ratenhöhe,oekonomisch,0,orig=— | keine CF-Änderung,a=1 | β=0.8825 | same/diff=1765/235 | y*=1 (p=...,1
188,Sparkonto_Wertpapiere,oekonomisch,1,"orig=— → cf=A63 (Δ=nan, p:—→0.497, y:—→0.0)",a=... < 100 DM | β=0.817 | same/diff=1634/366 ...,1
189,Status_des_Girokontos,oekonomisch,0,orig=— | keine CF-Änderung,a=0 <= ... < 200 DM | β=0.82 | same/diff=1640/...,1


In [10]:
def build_alpha_beta_details_report_md_from_results(
    merged: pd.DataFrame, out_path: str, suff_threshold: float = 0.7
):
    show_cols = ["feature","gruppe","alpha_notwendig","α (orig→cf)",
                 "β (fixierter Wert)", f"hinreichend (β≥{suff_threshold:.2f})"]
    show_cols = [c for c in show_cols if c in merged.columns]
    lines = []
    lines.append("# Profilbericht – α/β Detailtabellen (aus Evaluationsergebnissen)\n")
    for pid, gdf in merged.groupby("profile_index", sort=True):
        lines.append(f"\n## Profil {pid}\n")
        gdf = gdf.sort_values(["gruppe","feature"])
        table = gdf[show_cols].to_markdown(index=False)
        lines.append(table)
        lines.append("\n---")
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    with open(out_path, "w", encoding="utf-8") as f:
        f.write("\n".join(lines))
    print(f"💾 Markdown-Report gespeichert unter: {out_path}")

if EXPORT_MD:
    build_alpha_beta_details_report_md_from_results(
        merged=merged, out_path=OUT_MD_PATH, suff_threshold=SUFF_THRESHOLD
    )


💾 Markdown-Report gespeichert unter: c:/Users/JonasNiehus/Documents/Masterarbeit/Evaluation/Ergebnisse\profilbericht_alpha_beta_DETAILS_from_results.md


In [11]:
# Top-N hinreichende Features (β) pro Profil
TOP_N = 5
for pid, gdf in merged.groupby("profile_index", sort=True):
    print(f"\n— Top {TOP_N} β-Features (Profil {pid}) —")
    top_beta = gdf.sort_values("beta", ascending=False).head(TOP_N)
    display(top_beta[["feature","gruppe","beta","β (fixierter Wert)"]])

# α-Features (= notwendig) pro Profil
for pid, gdf in merged.groupby("profile_index", sort=True):
    alpha_feats = gdf[gdf["alpha_notwendig"] == 1]
    if not alpha_feats.empty:
        print(f"\n— α-notwendige Features (Profil {pid}) —")
        display(alpha_feats[["feature","gruppe","α (orig→cf)"]])



— Top 5 β-Features (Profil 0) —


Unnamed: 0,feature,gruppe,beta,β (fixierter Wert)
7,Ratenhöhe,oekonomisch,0.847,a=2 | β=0.847 | same/diff=1694/306 | y*=1 (p=0...
3,Dauer_in_Monaten,oekonomisch,0.8355,a=18 | β=0.8355 | same/diff=1671/329 | y*=1 (p...
12,Alter,sozio,0.829,a=39 | β=0.829 | same/diff=1658/342 | y*=1 (p=...
9,Status_des_Girokontos,oekonomisch,0.823,a=kein Girokonto | β=0.823 | same/diff=1646/35...
8,Sparkonto_Wertpapiere,oekonomisch,0.8195,a=... < 100 DM | β=0.8195 | same/diff=1639/361...



— Top 5 β-Features (Profil 1) —


Unnamed: 0,feature,gruppe,beta,β (fixierter Wert)
23,Dauer_in_Monaten,oekonomisch,0.68,a=72 | β=0.68 | same/diff=1360/640 | y*=0 (p=0...
32,Alter,sozio,0.2885,a=24 | β=0.2885 | same/diff=577/1423 | y*=0 (p...
26,Kreditverwendungszweck,oekonomisch,0.2545,a=Radio/TV | β=0.2545 | same/diff=509/1491 | y...
31,Weitere_Bürgen_Schuldner,oekonomisch,0.253,a=keine | β=0.253 | same/diff=506/1494 | y*=0 ...
22,Beschäftigt_seit,oekonomisch,0.2515,a=1 <= ... < 4 Jahre | β=0.2515 | same/diff=50...



— Top 5 β-Features (Profil 2) —


Unnamed: 0,feature,gruppe,beta,β (fixierter Wert)
41,Anzahl_bestehender_Kredite,oekonomisch,0.3605,a=4 | β=0.3605 | same/diff=721/1279 | y*=0 (p=...
47,Ratenhöhe,oekonomisch,0.2705,a=4 | β=0.2705 | same/diff=541/1459 | y*=0 (p=...
52,Alter,sozio,0.2685,a=27 | β=0.2685 | same/diff=537/1463 | y*=0 (p...
46,Kreditverwendungszweck,oekonomisch,0.257,a=Möbel/Ausstattung | β=0.257 | same/diff=514/...
42,Beschäftigt_seit,oekonomisch,0.2535,a=... < 1 Jahr | β=0.2535 | same/diff=507/1493...



— Top 5 β-Features (Profil 3) —


Unnamed: 0,feature,gruppe,beta,β (fixierter Wert)
63,Dauer_in_Monaten,oekonomisch,0.8555,a=12 | β=0.8555 | same/diff=1711/289 | y*=1 (p...
69,Status_des_Girokontos,oekonomisch,0.8205,a=... >= 200 DM / Gehaltspfändung mindestens 1...
79,Wohnsitzdauer,sozio,0.809,a=1 | β=0.809 | same/diff=1618/382 | y*=1 (p=0...
68,Sparkonto_Wertpapiere,oekonomisch,0.8085,a=... < 100 DM | β=0.8085 | same/diff=1617/383...
75,Familienstand_Geschlecht,sozio,0.8035,a=weiblich : geschieden/getrennt/verheiratet |...



— Top 5 β-Features (Profil 4) —


Unnamed: 0,feature,gruppe,beta,β (fixierter Wert)
83,Dauer_in_Monaten,oekonomisch,0.501,a=54 | β=0.501 | same/diff=1002/998 | y*=0 (p=...
84,Kreditbetrag,oekonomisch,0.3855,a=1.594e+04 | β=0.3855 | same/diff=771/1229 | ...
86,Kreditverwendungszweck,oekonomisch,0.257,a=Geschäft | β=0.257 | same/diff=514/1486 | y*...
82,Beschäftigt_seit,oekonomisch,0.2525,a=... < 1 Jahr | β=0.2525 | same/diff=505/1495...
91,Weitere_Bürgen_Schuldner,oekonomisch,0.2525,a=keine | β=0.2525 | same/diff=505/1495 | y*=0...



— Top 5 β-Features (Profil 5) —


Unnamed: 0,feature,gruppe,beta,β (fixierter Wert)
107,Ratenhöhe,oekonomisch,0.281,a=4 | β=0.281 | same/diff=562/1438 | y*=0 (p=0...
106,Kreditverwendungszweck,oekonomisch,0.273,a=Radio/TV | β=0.273 | same/diff=546/1454 | y*...
105,Kreditgeschichte,oekonomisch,0.2635,a=kritisches Konto / andere Kredite bestehen (...
111,Weitere_Bürgen_Schuldner,oekonomisch,0.255,a=keine | β=0.255 | same/diff=510/1490 | y*=0 ...
102,Beschäftigt_seit,oekonomisch,0.2535,a=.. >= 7 Jahre | β=0.2535 | same/diff=507/149...



— Top 5 β-Features (Profil 6) —


Unnamed: 0,feature,gruppe,beta,β (fixierter Wert)
123,Dauer_in_Monaten,oekonomisch,0.5705,a=60 | β=0.5705 | same/diff=1141/859 | y*=0 (p...
124,Kreditbetrag,oekonomisch,0.3815,a=1.565e+04 | β=0.3815 | same/diff=763/1237 | ...
132,Alter,sozio,0.2925,a=21 | β=0.2925 | same/diff=585/1415 | y*=0 (p...
122,Beschäftigt_seit,oekonomisch,0.26,a=4 <= ... < 7 Jahre | β=0.26 | same/diff=520/...
126,Kreditverwendungszweck,oekonomisch,0.26,a=Radio/TV | β=0.26 | same/diff=520/1480 | y*=...



— Top 5 β-Features (Profil 7) —


Unnamed: 0,feature,gruppe,beta,β (fixierter Wert)
152,Alter,sozio,0.9685,a=74 | β=0.9685 | same/diff=1937/63 | y*=1 (p=...
143,Dauer_in_Monaten,oekonomisch,0.8985,a=6 | β=0.8985 | same/diff=1797/203 | y*=1 (p=...
147,Ratenhöhe,oekonomisch,0.891,a=1 | β=0.891 | same/diff=1782/218 | y*=1 (p=0...
148,Sparkonto_Wertpapiere,oekonomisch,0.8255,a=... < 100 DM | β=0.8255 | same/diff=1651/349...
149,Status_des_Girokontos,oekonomisch,0.8225,a=... >= 200 DM / Gehaltspfändung mindestens 1...



— Top 5 β-Features (Profil 8) —


Unnamed: 0,feature,gruppe,beta,β (fixierter Wert)
163,Dauer_in_Monaten,oekonomisch,0.903,a=4 | β=0.903 | same/diff=1806/194 | y*=1 (p=0...
167,Ratenhöhe,oekonomisch,0.874,a=1 | β=0.874 | same/diff=1748/252 | y*=1 (p=0...
168,Sparkonto_Wertpapiere,oekonomisch,0.8205,a=... < 100 DM | β=0.8205 | same/diff=1641/359...
169,Status_des_Girokontos,oekonomisch,0.8175,a=kein Girokonto | β=0.8175 | same/diff=1635/3...
164,Kreditbetrag,oekonomisch,0.816,a=601 | β=0.816 | same/diff=1632/368 | y*=1 (p...



— Top 5 β-Features (Profil 9) —


Unnamed: 0,feature,gruppe,beta,β (fixierter Wert)
192,Alter,sozio,0.908,a=55 | β=0.908 | same/diff=1816/184 | y*=1 (p=...
187,Ratenhöhe,oekonomisch,0.8825,a=1 | β=0.8825 | same/diff=1765/235 | y*=1 (p=...
189,Status_des_Girokontos,oekonomisch,0.82,a=0 <= ... < 200 DM | β=0.82 | same/diff=1640/...
188,Sparkonto_Wertpapiere,oekonomisch,0.817,a=... < 100 DM | β=0.817 | same/diff=1634/366 ...
195,Familienstand_Geschlecht,sozio,0.8005,a=männlich : ledig | β=0.8005 | same/diff=1601...



— α-notwendige Features (Profil 0) —


Unnamed: 0,feature,gruppe,α (orig→cf)
0,Andere_Ratenverpflichtungen,oekonomisch,"orig=— → cf=A141 (Δ=nan, p:—→0.494, y:—→0.0)"
3,Dauer_in_Monaten,oekonomisch,"orig=— → cf=48 (Δ=30, p:—→0.494, y:—→0.0)"
5,Kreditgeschichte,oekonomisch,"orig=— → cf=A30 (Δ=nan, p:—→0.492, y:—→0.0)"
6,Kreditverwendungszweck,oekonomisch,"orig=— → cf=A46 (Δ=nan, p:—→0.475, y:—→0.0)"
7,Ratenhöhe,oekonomisch,"orig=— → cf=3.25 (Δ=1.25, p:—→0.491, y:—→0.0)"
8,Sparkonto_Wertpapiere,oekonomisch,"orig=— → cf=A61 (Δ=nan, p:—→0.479, y:—→0.0)"
9,Status_des_Girokontos,oekonomisch,"orig=— → cf=A11 (Δ=nan, p:—→0.474, y:—→0.0)"
12,Alter,sozio,"orig=— → cf=26.75 (Δ=12.25, p:—→0.491, y:—→0.0)"
17,Unterhaltspflichtige_Personen,sozio,"orig=— → cf=1.125 (Δ=0.875, p:—→0.495, y:—→0.0)"



— α-notwendige Features (Profil 1) —


Unnamed: 0,feature,gruppe,α (orig→cf)
23,Dauer_in_Monaten,oekonomisch,"orig=— → cf=32.25 (Δ=39.75, p:—→0.503, y:—→1.0)"



— α-notwendige Features (Profil 2) —


Unnamed: 0,feature,gruppe,α (orig→cf)
41,Anzahl_bestehender_Kredite,oekonomisch,"orig=— → cf=1.25 (Δ=2.75, p:—→0.501, y:—→1.0)"



— α-notwendige Features (Profil 3) —


Unnamed: 0,feature,gruppe,α (orig→cf)
60,Andere_Ratenverpflichtungen,oekonomisch,"orig=— → cf=A141 (Δ=nan, p:—→0.487, y:—→0.0)"
61,Anzahl_bestehender_Kredite,oekonomisch,"orig=— → cf=1.125 (Δ=0.125, p:—→0.499, y:—→0.0)"
62,Beschäftigt_seit,oekonomisch,"orig=— → cf=A71 (Δ=nan, p:—→0.427, y:—→0.0)"
63,Dauer_in_Monaten,oekonomisch,"orig=— → cf=16.5 (Δ=4.5, p:—→0.473, y:—→0.0)"
64,Kreditbetrag,oekonomisch,"orig=— → cf=1766 (Δ=1157, p:—→0.499, y:—→0.0)"
65,Kreditgeschichte,oekonomisch,"orig=— → cf=A30 (Δ=nan, p:—→0.366, y:—→0.0)"
66,Kreditverwendungszweck,oekonomisch,"orig=— → cf=A40 (Δ=nan, p:—→0.487, y:—→0.0)"
68,Sparkonto_Wertpapiere,oekonomisch,"orig=— → cf=A61 (Δ=nan, p:—→0.439, y:—→0.0)"
69,Status_des_Girokontos,oekonomisch,"orig=— → cf=A11 (Δ=nan, p:—→0.397, y:—→0.0)"
70,Vermögen,oekonomisch,"orig=— → cf=A122 (Δ=nan, p:—→0.493, y:—→0.0)"



— α-notwendige Features (Profil 4) —


Unnamed: 0,feature,gruppe,α (orig→cf)
82,Beschäftigt_seit,oekonomisch,"orig=— → cf=A75 (Δ=nan, p:—→0.501, y:—→1.0)"
83,Dauer_in_Monaten,oekonomisch,"orig=— → cf=48 (Δ=6, p:—→0.510, y:—→1.0)"
85,Kreditgeschichte,oekonomisch,"orig=— → cf=A33 (Δ=nan, p:—→0.524, y:—→1.0)"
86,Kreditverwendungszweck,oekonomisch,"orig=— → cf=A40 (Δ=nan, p:—→0.509, y:—→1.0)"
89,Status_des_Girokontos,oekonomisch,"orig=— → cf=A13 (Δ=nan, p:—→0.524, y:—→1.0)"
90,Vermögen,oekonomisch,"orig=— → cf=A121 (Δ=nan, p:—→0.526, y:—→1.0)"
94,Beruf,sozio,"orig=— → cf=A171 (Δ=nan, p:—→0.506, y:—→1.0)"
95,Familienstand_Geschlecht,sozio,"orig=— → cf=A91 (Δ=nan, p:—→0.511, y:—→1.0)"
96,Telefon,sozio,"orig=— → cf=A191 (Δ=nan, p:—→0.536, y:—→1.0)"
97,Unterhaltspflichtige_Personen,sozio,"orig=— → cf=1.25 (Δ=0.25, p:—→0.514, y:—→1.0)"



— α-notwendige Features (Profil 5) —


Unnamed: 0,feature,gruppe,α (orig→cf)
100,Andere_Ratenverpflichtungen,oekonomisch,"orig=— → cf=A143 (Δ=nan, p:—→0.536, y:—→1.0)"
101,Anzahl_bestehender_Kredite,oekonomisch,"orig=— → cf=1.875 (Δ=0.125, p:—→0.505, y:—→1.0)"
102,Beschäftigt_seit,oekonomisch,"orig=— → cf=A72 (Δ=nan, p:—→0.518, y:—→1.0)"
103,Dauer_in_Monaten,oekonomisch,"orig=— → cf=21.75 (Δ=2.25, p:—→0.512, y:—→1.0)"
105,Kreditgeschichte,oekonomisch,"orig=— → cf=A32 (Δ=nan, p:—→0.517, y:—→1.0)"
106,Kreditverwendungszweck,oekonomisch,"orig=— → cf=A41 (Δ=nan, p:—→0.657, y:—→1.0)"
107,Ratenhöhe,oekonomisch,"orig=— → cf=3.625 (Δ=0.375, p:—→0.521, y:—→1.0)"
108,Sparkonto_Wertpapiere,oekonomisch,"orig=— → cf=A63 (Δ=nan, p:—→0.526, y:—→1.0)"
109,Status_des_Girokontos,oekonomisch,"orig=— → cf=A12 (Δ=nan, p:—→0.505, y:—→1.0)"
110,Vermögen,oekonomisch,"orig=— → cf=A121 (Δ=nan, p:—→0.612, y:—→1.0)"



— α-notwendige Features (Profil 6) —


Unnamed: 0,feature,gruppe,α (orig→cf)
123,Dauer_in_Monaten,oekonomisch,"orig=— → cf=11.25 (Δ=48.75, p:—→0.503, y:—→1.0)"



— α-notwendige Features (Profil 8) —


Unnamed: 0,feature,gruppe,α (orig→cf)
167,Ratenhöhe,oekonomisch,"orig=— → cf=3.625 (Δ=2.625, p:—→0.483, y:—→0.0)"



— α-notwendige Features (Profil 9) —


Unnamed: 0,feature,gruppe,α (orig→cf)
181,Anzahl_bestehender_Kredite,oekonomisch,"orig=— → cf=1.625 (Δ=0.625, p:—→0.492, y:—→0.0)"
188,Sparkonto_Wertpapiere,oekonomisch,"orig=— → cf=A63 (Δ=nan, p:—→0.497, y:—→0.0)"
192,Alter,sozio,"orig=— → cf=36.25 (Δ=18.75, p:—→0.498, y:—→0.0)"
194,Beruf,sozio,"orig=— → cf=A174 (Δ=nan, p:—→0.496, y:—→0.0)"
195,Familienstand_Geschlecht,sozio,"orig=— → cf=A92 (Δ=nan, p:—→0.465, y:—→0.0)"
