In [2]:
#!/usr/bin/env python3
"""
AI Ethics in Action — Privacy, Transparency, Accountability, Fairness (PTAF)

A compact, end-to-end, portfolio-friendly demo that:
  1) builds a synthetic dataset with identifiers + quasi-identifiers + a sensitive attribute
  2) applies privacy protections (pseudonymization + generalization + DP-like aggregates)
  3) trains an interpretable model and generates transparency artifacts (coefficients, reason codes, model card)
  4) logs accountability artifacts (audit log, dataset/model fingerprints, HITL escalation)
  5) evaluates fairness (subgroup metrics, disparate impact, equal opportunity) and applies a simple mitigation
  6) exports outputs to ./artifacts for easy GitHub sharing

Run:
  python ethics_in_action.py

Optional:
  python ethics_in_action.py --n 6000 --seed 7 --threshold 0.55

Dependencies:
  pip install numpy pandas scikit-learn
"""

from __future__ import annotations

import argparse
import hashlib
import json
import os
from dataclasses import asdict, dataclass
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
)
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder


# ----------------------------
# Utilities
# ----------------------------

def now_iso() -> str:
    return datetime.now().isoformat(timespec="seconds")


def ensure_dir(path: Path) -> None:
    path.mkdir(parents=True, exist_ok=True)


def write_json(path: Path, payload: Any) -> None:
    path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")


def write_jsonl(path: Path, events: List[Dict[str, Any]]) -> None:
    with path.open("a", encoding="utf-8") as f:
        for e in events:
            f.write(json.dumps(e, ensure_ascii=False) + "\n")


# ----------------------------
# Data Generation (Synthetic)
# ----------------------------

def make_synthetic_hiring_data(n: int, seed: int) -> pd.DataFrame:
    """
    Creates a synthetic 'hiring screen' dataset.

    Columns include:
      - direct identifiers: name, email (to demonstrate privacy controls)
      - quasi-identifiers: age, zipcode, education (re-identification risk)
      - features: experience_years, skill_score
      - sensitive attribute: group (0/1) used for fairness evaluation
      - label: recommended_for_review (0/1)

    Note: This is a demo dataset for learning and portfolio use.
    """
    rng = np.random.default_rng(seed)

    age = rng.integers(21, 60, size=n)
    zipcode = rng.choice([10001, 10002, 10003, 10004, 10005, 10006, 10007, 10008], size=n)
    education = rng.choice(
        ["HighSchool", "Bachelors", "Masters", "PhD"],
        size=n,
        p=[0.20, 0.45, 0.28, 0.07],
    )
    experience_years = np.clip(rng.normal(loc=(age - 21) / 3, scale=2.5, size=n), 0, 30)
    skill_score = np.clip(rng.normal(loc=65, scale=12, size=n), 0, 100)

    # Sensitive attribute (e.g., a protected demographic group)
    group = rng.binomial(1, 0.45, size=n)

    edu_map = {"HighSchool": 0, "Bachelors": 1, "Masters": 2, "PhD": 3}
    edu_num = np.array([edu_map[e] for e in education])

    # Signal from "legitimate" features
    base = 0.03 * skill_score + 0.09 * experience_years + 0.25 * edu_num

    # Introduce mild bias in label generation (for fairness demonstration)
    biased_score = base - 0.25 * group + rng.normal(0, 0.8, size=n)

    # Convert to probability and sample label
    prob = 1 / (1 + np.exp(-(biased_score - 3.4)))
    y = rng.binomial(1, prob, size=n)

    df = pd.DataFrame(
        {
            "name": [f"Candidate_{i}" for i in range(n)],
            "email": [f"user{i}@example.com" for i in range(n)],
            "age": age,
            "zipcode": zipcode,
            "education": education,
            "experience_years": np.round(experience_years, 1),
            "skill_score": np.round(skill_score, 1),
            "group": group,
            "recommended_for_review": y,
        }
    )
    return df


# ----------------------------
# Privacy
# ----------------------------

def pseudonymize(df: pd.DataFrame, salt: str, id_cols: Tuple[str, ...] = ("name", "email")) -> pd.DataFrame:
    """
    Privacy control: pseudonymization
      - create a stable pseudo_id using salted hashing
      - drop direct identifiers (name/email)
    """
    out = df.copy()

    def pid(email: str) -> str:
        return hashlib.sha256((salt + str(email)).encode("utf-8")).hexdigest()[:12]

    out["pseudo_id"] = out["email"].map(pid)
    out = out.drop(columns=list(id_cols))
    cols = ["pseudo_id"] + [c for c in out.columns if c != "pseudo_id"]
    return out[cols]


def generalize_quasi_identifiers(
    df: pd.DataFrame,
    k: int = 15,
    quasi: Tuple[str, ...] = ("age", "zipcode", "education"),
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Privacy control: generalization (k-anonymity style)
      - age -> age_band (5-year buckets)
      - zipcode -> zip_prefix (first 3 digits + "**")
      - keeps education as-is (often used as a quasi-identifier)
    Returns:
      - generalized dataframe
      - group size table for the quasi-identifier combinations
    """
    out = df.copy()

    out["age_band_start"] = (out["age"] // 5) * 5
    out["age_band"] = out["age_band_start"].astype(str) + "-" + (out["age_band_start"] + 4).astype(str)
    out["zip_prefix"] = out["zipcode"].astype(str).str[:3] + "**"

    # drop exact quasi-identifiers that increase re-identification risk
    out = out.drop(columns=["age", "zipcode", "age_band_start"])

    # group-size table
    group_sizes = (
        out.groupby(["age_band", "zip_prefix", "education"])
        .size()
        .reset_index(name="count")
        .sort_values("count")
    )
    # the caller can inspect how many groups fall below k
    return out, group_sizes


def dp_like_mean(values: np.ndarray, epsilon: float, value_range: Tuple[float, float], seed: int) -> float:
    """
    Privacy control: DP-like aggregate release (teaching demo)
      - sensitivity(mean) ≈ (max-min)/n
      - Laplace noise with scale = sensitivity/epsilon

    Notes:
      - This is NOT a production DP implementation.
      - It demonstrates the concept of adding calibrated noise before sharing aggregates.
    """
    rng = np.random.default_rng(seed)
    values = np.asarray(values, dtype=float)
    n = max(len(values), 1)
    sensitivity = (value_range[1] - value_range[0]) / n
    noise = rng.laplace(loc=0.0, scale=sensitivity / max(epsilon, 1e-9))
    return float(values.mean() + noise)


# ----------------------------
# Transparency
# ----------------------------

def build_pipeline(cat_cols: List[str], num_cols: List[str]) -> Pipeline:
    pre = ColumnTransformer(
        transformers=[
            ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
            ("num", "passthrough", num_cols),
        ]
    )
    clf = LogisticRegression(max_iter=2000)
    return Pipeline([("prep", pre), ("clf", clf)])


def feature_names_from_pipeline(pipe: Pipeline, cat_cols: List[str], num_cols: List[str]) -> List[str]:
    pre: ColumnTransformer = pipe.named_steps["prep"]
    ohe: OneHotEncoder = pre.named_transformers_["cat"]
    cat_names = list(ohe.get_feature_names_out(cat_cols))
    return cat_names + num_cols


def coefficient_table(pipe: Pipeline, cat_cols: List[str], num_cols: List[str]) -> pd.DataFrame:
    names = feature_names_from_pipeline(pipe, cat_cols, num_cols)
    coefs = pipe.named_steps["clf"].coef_[0]
    tbl = pd.DataFrame({"feature": names, "coef": coefs, "abs_coef": np.abs(coefs)})
    return tbl.sort_values("abs_coef", ascending=False).reset_index(drop=True)


def reason_codes(pipe: Pipeline, row_df: pd.DataFrame, cat_cols: List[str], num_cols: List[str], top_k: int = 5) -> pd.DataFrame:
    """
    Simple per-decision explanation:
      - for logistic regression, contribution ≈ feature_value * coefficient in transformed space
    """
    pre: ColumnTransformer = pipe.named_steps["prep"]
    clf: LogisticRegression = pipe.named_steps["clf"]

    x_vec = pre.transform(row_df)

    # x_vec might be sparse depending on sklearn; convert safely
    x_dense = x_vec.toarray() if hasattr(x_vec, "toarray") else np.asarray(x_vec)
    contrib = x_dense[0] * clf.coef_[0]

    names = feature_names_from_pipeline(pipe, cat_cols, num_cols)
    out = pd.DataFrame({"feature": names, "contribution": contrib, "abs": np.abs(contrib)})
    return out.sort_values("abs", ascending=False).head(top_k)[["feature", "contribution"]].reset_index(drop=True)


def make_model_card(
    model_name: str,
    intended_use: str,
    limitations: List[str],
    data_notes: str,
    metrics: Dict[str, float],
    ethical_notes: Dict[str, str],
) -> Dict[str, Any]:
    return {
        "model_name": model_name,
        "created_at": now_iso(),
        "intended_use": intended_use,
        "data_notes": data_notes,
        "limitations": limitations,
        "evaluation_metrics": metrics,
        "ethical_notes": ethical_notes,
    }


# ----------------------------
# Accountability
# ----------------------------

def df_fingerprint(df: pd.DataFrame) -> str:
    """
    Stable-ish fingerprint for dataset content.
    Useful for traceability: "which data version trained this model?"
    """
    h = pd.util.hash_pandas_object(df, index=True).values.tobytes()
    return hashlib.sha256(h).hexdigest()


def pipeline_fingerprint(pipe: Pipeline) -> str:
    """
    Fingerprint model configuration (parameters).
    For demos: captures a stable signature of the pipeline setup.
    """
    params = pipe.get_params(deep=True)
    blob = json.dumps(params, sort_keys=True, default=str).encode("utf-8")
    return hashlib.sha256(blob).hexdigest()


@dataclass
class AuditEvent:
    timestamp: str
    event_type: str
    payload: Dict[str, Any]


class Auditor:
    def __init__(self, audit_path: Path):
        self.audit_path = audit_path
        self.buffer: List[Dict[str, Any]] = []

    def log(self, event_type: str, payload: Dict[str, Any]) -> None:
        self.buffer.append({"timestamp": now_iso(), "event_type": event_type, "payload": payload})

    def flush(self) -> None:
        if self.buffer:
            write_jsonl(self.audit_path, self.buffer)
            self.buffer = []


def predict_with_audit(
    auditor: Auditor,
    pipe: Pipeline,
    row_df: pd.DataFrame,
    cat_cols: List[str],
    num_cols: List[str],
    reviewer: str,
    threshold: float,
) -> Dict[str, Any]:
    p = float(pipe.predict_proba(row_df)[0, 1])
    decision = "REVIEW" if p >= threshold else "NO_REVIEW"
    reasons = reason_codes(pipe, row_df, cat_cols, num_cols, top_k=4).to_dict(orient="records")

    auditor.log(
        "prediction",
        {
            "reviewer": reviewer,
            "threshold": threshold,
            "probability": p,
            "decision": decision,
            "input_features": row_df.to_dict(orient="records")[0],
            "reason_codes": reasons,
        },
    )
    return {"probability": p, "decision": decision, "reason_codes": reasons}


def hitl_policy(probability: float, low: float = 0.45, high: float = 0.65) -> str:
    """
    Human-in-the-loop escalation policy:
      - low confidence: auto NO_REVIEW
      - high confidence: auto REVIEW
      - ambiguous: escalate to human
    """
    if probability < low:
        return "AUTO_NO_REVIEW"
    if probability > high:
        return "AUTO_REVIEW"
    return "ESCALATE_TO_HUMAN"


# ----------------------------
# Fairness
# ----------------------------

def subgroup_report(y_true: np.ndarray, y_pred: np.ndarray, sensitive: np.ndarray) -> pd.DataFrame:
    rows = []
    for g in sorted(np.unique(sensitive)):
        idx = sensitive == g
        rows.append(
            {
                "group": int(g),
                "n": int(idx.sum()),
                "accuracy": float(accuracy_score(y_true[idx], y_pred[idx])),
                "precision": float(precision_score(y_true[idx], y_pred[idx], zero_division=0)),
                "recall": float(recall_score(y_true[idx], y_pred[idx], zero_division=0)),
                "selection_rate": float(y_pred[idx].mean()),
            }
        )
    return pd.DataFrame(rows)


def disparate_impact(y_pred: np.ndarray, sensitive: np.ndarray) -> Tuple[Optional[float], Dict[int, float]]:
    rates: Dict[int, float] = {}
    for g in sorted(np.unique(sensitive)):
        idx = sensitive == g
        rates[int(g)] = float(y_pred[idx].mean())
    if 0 in rates and rates[0] > 0 and 1 in rates:
        return float(rates[1] / rates[0]), rates
    return None, rates


def equal_opportunity_diff(y_true: np.ndarray, y_pred: np.ndarray, sensitive: np.ndarray) -> Tuple[float, Dict[int, float]]:
    tpr: Dict[int, float] = {}
    for g in sorted(np.unique(sensitive)):
        idx = sensitive == g
        tpr[int(g)] = float(recall_score(y_true[idx], y_pred[idx], zero_division=0))
    return float(tpr.get(1, 0.0) - tpr.get(0, 0.0)), tpr


def apply_group_thresholds(proba: np.ndarray, sensitive: np.ndarray, t0: float, t1: float) -> np.ndarray:
    out = np.zeros_like(proba, dtype=int)
    out[sensitive == 0] = (proba[sensitive == 0] >= t0).astype(int)
    out[sensitive == 1] = (proba[sensitive == 1] >= t1).astype(int)
    return out


def search_thresholds(
    proba: np.ndarray,
    y_true: np.ndarray,
    sensitive: np.ndarray,
    grid: np.ndarray,
) -> Dict[str, float]:
    """
    Simple grid search for thresholds that pull DI closer to 1.0
    while keeping accuracy reasonable.
    """
    best: Optional[Dict[str, float]] = None
    for t0 in grid:
        for t1 in grid:
            pred = apply_group_thresholds(proba, sensitive, float(t0), float(t1))
            di, _ = disparate_impact(pred, sensitive)
            if di is None:
                continue
            acc = float(accuracy_score(y_true, pred))
            score = -abs(di - 1.0) + 0.15 * acc  # lightweight objective for demo
            cand = {"t0": float(t0), "t1": float(t1), "di": float(di), "acc": acc, "score": float(score)}
            if best is None or cand["score"] > best["score"]:
                best = cand
    return best or {"t0": 0.5, "t1": 0.5, "di": float("nan"), "acc": 0.0, "score": float("-inf")}


# ----------------------------
# Main workflow
# ----------------------------

def main() -> None:
    parser = argparse.ArgumentParser(description="AI Ethics in Action (PTAF) demo.")
    parser.add_argument("--n", type=int, default=4000, help="Number of rows to generate.")
    parser.add_argument("--seed", type=int, default=42, help="Random seed.")
    parser.add_argument("--threshold", type=float, default=0.50, help="Decision threshold for REVIEW.")
    parser.add_argument("--epsilon", type=float, default=0.8, help="DP-like epsilon for aggregate demos.")
    parser.add_argument("--k", type=int, default=15, help="k for k-anonymity style generalization check.")
    args, _ = parser.parse_known_args()


    artifacts = Path("artifacts")
    ensure_dir(artifacts)

    # 1) Data
    df_raw = make_synthetic_hiring_data(n=args.n, seed=args.seed)
    df_raw.to_csv(artifacts / "raw_data.csv", index=False)

    # 2) Privacy: pseudonymize + generalize
    salt = f"PTAF_DEMO_SALT_{args.seed}"
    df_priv = pseudonymize(df_raw, salt=salt)
    df_priv.to_csv(artifacts / "pseudonymized_data.csv", index=False)

    df_gen, group_sizes = generalize_quasi_identifiers(df_priv, k=args.k)
    df_gen.to_csv(artifacts / "generalized_data.csv", index=False)
    group_sizes.to_csv(artifacts / "k_anonymity_group_sizes.csv", index=False)

    # DP-like aggregates
    dp_report = []
    for g in [0, 1]:
        mask = df_priv["group"].values == g
        real_mean = float(df_raw.loc[mask, "skill_score"].mean())
        dp_mean = dp_like_mean(
            values=df_raw.loc[mask, "skill_score"].values,
            epsilon=args.epsilon,
            value_range=(0.0, 100.0),
            seed=args.seed + 100 + g,
        )
        dp_report.append({"group": g, "real_mean_skill": real_mean, "dp_like_mean_skill": dp_mean})
    write_json(artifacts / "dp_like_aggregates.json", {"created_at": now_iso(), "epsilon": args.epsilon, "items": dp_report})

    # 3) Train interpretable model (transparency)
    # Use privacy-safe columns; do NOT use pseudo_id or group as features
    y = df_priv["recommended_for_review"].values.astype(int)
    sensitive = df_priv["group"].values.astype(int)

    X = df_priv.drop(columns=["recommended_for_review", "pseudo_id", "group"])
    cat_cols = ["education"]
    num_cols = [c for c in X.columns if c not in cat_cols]

    X_train, X_test, y_train, y_test, s_train, s_test = train_test_split(
        X, y, sensitive, test_size=0.25, random_state=args.seed, stratify=y
    )

    pipe = build_pipeline(cat_cols, num_cols)
    pipe.fit(X_train, y_train)

    proba = pipe.predict_proba(X_test)[:, 1]
    pred = (proba >= args.threshold).astype(int)

    metrics = {
        "accuracy": float(accuracy_score(y_test, pred)),
        "precision": float(precision_score(y_test, pred)),
        "recall": float(recall_score(y_test, pred)),
        "threshold": float(args.threshold),
    }
    write_json(artifacts / "metrics.json", {"created_at": now_iso(), "metrics": metrics})

    # Transparency artifacts: coefficients + sample explanations + model card
    coef_tbl = coefficient_table(pipe, cat_cols, num_cols)
    coef_tbl.to_csv(artifacts / "coefficients.csv", index=False)

    explanations = []
    sample_indices = np.linspace(0, len(X_test) - 1, num=min(12, len(X_test)), dtype=int)
    for idx in sample_indices:
        row = X_test.iloc[[idx]]
        p = float(pipe.predict_proba(row)[0, 1])
        reasons_tbl = reason_codes(pipe, row, cat_cols, num_cols, top_k=5)
        explanations.append(
            {
                "row_index": int(idx),
                "probability": p,
                "decision": "REVIEW" if p >= args.threshold else "NO_REVIEW",
                "reason_codes": reasons_tbl.to_dict(orient="records"),
                "input": row.to_dict(orient="records")[0],
            }
        )
    write_json(artifacts / "sample_explanations.json", {"created_at": now_iso(), "items": explanations})

    model_card = make_model_card(
        model_name="HiringScreen-PTAF-v1 (LogisticRegression)",
        intended_use="Assist with screening by recommending candidates for human review (not an automated hiring decision).",
        limitations=[
            "Synthetic dataset used for demonstration; not representative of real labor markets.",
            "Model behavior depends on feature definitions; requires domain review before any real use.",
            "Interpretability is limited to linear contributions; does not capture complex interactions.",
        ],
        data_notes="Trained on education, experience_years, skill_score; direct identifiers removed; sensitive attribute used for evaluation only.",
        metrics=metrics,
        ethical_notes={
            "privacy": "Direct identifiers removed; pseudonymous IDs used; quasi-identifiers generalized for sharing.",
            "transparency": "Coefficients and per-decision reason codes exported; model card included.",
            "accountability": "Audit logs + dataset/model fingerprints; HITL escalation policy demonstrated.",
            "fairness": "Subgroup metrics, disparate impact, and equal opportunity reported; threshold mitigation demonstrated.",
        },
    )
    write_json(artifacts / "model_card.json", model_card)

    # 4) Accountability: audit + fingerprints + HITL policy
    auditor = Auditor(audit_path=artifacts / "audit_log.jsonl")

    train_fprint = df_fingerprint(pd.concat([X_train.reset_index(drop=True), pd.Series(y_train, name="label")], axis=1))
    model_fprint = pipeline_fingerprint(pipe)

    auditor.log(
        "model_registry",
        {
            "model_name": model_card["model_name"],
            "dataset_fingerprint": train_fprint,
            "model_fingerprint": model_fprint,
            "metrics": metrics,
        },
    )

    # Log a few audited predictions
    for idx in sample_indices[:6]:
        row = X_test.iloc[[idx]]
        out = predict_with_audit(
            auditor=auditor,
            pipe=pipe,
            row_df=row,
            cat_cols=cat_cols,
            num_cols=num_cols,
            reviewer="demo_user",
            threshold=args.threshold,
        )
        auditor.log(
            "hitl_policy",
            {
                "row_index": int(idx),
                "probability": out["probability"],
                "action": hitl_policy(out["probability"]),
            },
        )

    auditor.flush()

    # 5) Fairness: baseline metrics + mitigation via threshold tuning
    base_subgroups = subgroup_report(y_test, pred, s_test)
    base_subgroups.to_csv(artifacts / "fairness_subgroup_metrics_baseline.csv", index=False)

    di, sel_rates = disparate_impact(pred, s_test)
    eod, tpr = equal_opportunity_diff(y_test, pred, s_test)

    fairness_summary = {
        "created_at": now_iso(),
        "baseline": {
            "threshold": float(args.threshold),
            "selection_rates": sel_rates,
            "disparate_impact_ratio_g1_over_g0": di,
            "tpr_by_group": tpr,
            "equal_opportunity_diff_g1_minus_g0": eod,
        },
    }

    # Mitigation: allow different thresholds per group (demo)
    grid = np.linspace(0.35, 0.65, 13)
    best = search_thresholds(proba, y_test, s_test, grid=grid)

    pred_m = apply_group_thresholds(proba, s_test, best["t0"], best["t1"])
    mitigated_subgroups = subgroup_report(y_test, pred_m, s_test)
    mitigated_subgroups.to_csv(artifacts / "fairness_subgroup_metrics_mitigated.csv", index=False)

    di2, sel_rates2 = disparate_impact(pred_m, s_test)
    eod2, tpr2 = equal_opportunity_diff(y_test, pred_m, s_test)

    fairness_summary["mitigated"] = {
        "thresholds": {"group0": best["t0"], "group1": best["t1"]},
        "accuracy": float(accuracy_score(y_test, pred_m)),
        "selection_rates": sel_rates2,
        "disparate_impact_ratio_g1_over_g0": di2,
        "tpr_by_group": tpr2,
        "equal_opportunity_diff_g1_minus_g0": eod2,
        "search_objective": {"score": best["score"], "baseline_acc": metrics["accuracy"]},
        "note": "This mitigation is for demonstration and must be governed carefully in real systems.",
    }

    write_json(artifacts / "fairness_summary.json", fairness_summary)

    # Final console summary
    print("\n=== PTAF Demo Complete ===")
    print(f"Artifacts written to: {artifacts.resolve()}")
    print("\nKey files:")
    for name in [
        "model_card.json",
        "coefficients.csv",
        "sample_explanations.json",
        "audit_log.jsonl",
        "fairness_summary.json",
        "fairness_subgroup_metrics_baseline.csv",
        "fairness_subgroup_metrics_mitigated.csv",
        "dp_like_aggregates.json",
        "k_anonymity_group_sizes.csv",
    ]:
        p = artifacts / name
        if p.exists():
            print(f" - {name}")

    print("\nHeadline metrics:")
    print(f" - Accuracy:  {metrics['accuracy']:.3f}")
    print(f" - Precision: {metrics['precision']:.3f}")
    print(f" - Recall:    {metrics['recall']:.3f}")
    print("\nFairness (baseline):")
    print(f" - Disparate Impact (g1/g0): {di if di is not None else 'n/a'}")
    print(f" - Equal Opportunity Diff:   {eod:.3f}")
    print("\nFairness (mitigated demo):")
    print(f" - Thresholds: group0={best['t0']:.2f}, group1={best['t1']:.2f}")
    print(f" - Disparate Impact (g1/g0): {di2 if di2 is not None else 'n/a'}")
    print(f" - Equal Opportunity Diff:   {eod2:.3f}")
    print("")


if __name__ == "__main__":
    main()



=== PTAF Demo Complete ===
Artifacts written to: D:\Scripts\Python Scripts\Responsible AI\artifacts

Key files:
 - model_card.json
 - coefficients.csv
 - sample_explanations.json
 - audit_log.jsonl
 - fairness_summary.json
 - fairness_subgroup_metrics_baseline.csv
 - fairness_subgroup_metrics_mitigated.csv
 - dp_like_aggregates.json
 - k_anonymity_group_sizes.csv

Headline metrics:
 - Accuracy:  0.620
 - Precision: 0.455
 - Recall:    0.180

Fairness (baseline):
 - Disparate Impact (g1/g0): 1.0408850408850407
 - Equal Opportunity Diff:   0.025

Fairness (mitigated demo):
 - Thresholds: group0=0.45, group1=0.45
 - Disparate Impact (g1/g0): 1.0027314408350405
 - Equal Opportunity Diff:   0.024

