In [3]:
#!/usr/bin/env python3
"""
AI Ethics in Action â€” Privacy, Transparency, Accountability, Fairness (PTAF)

This script is intentionally written as a "teaching + portfolio" artifact:
- It is runnable end-to-end
- It exports artifacts to ./artifacts so you can share results on GitHub
- It includes detailed, practical comments explaining WHY each step exists

What you get:
  1) A synthetic dataset with direct identifiers + quasi-identifiers + a sensitive attribute
  2) Privacy controls:
       - pseudonymization (salted hashing of identifiers)
       - generalization of quasi-identifiers (k-anonymity style check)
       - DP-like noisy aggregates (concept demo)
  3) Transparency artifacts:
       - interpretable logistic regression coefficients
       - per-decision "reason codes"
       - a simple model card (documentation)
  4) Accountability artifacts:
       - audit log (JSONL)
       - dataset/model fingerprints
       - a human-in-the-loop escalation policy
  5) Fairness evaluation and a simple mitigation demo:
       - subgroup metrics
       - disparate impact ratio
       - equal opportunity difference
       - threshold tuning (demo mitigation)

Run:
  python ethics_in_action.py

Optional:
  python ethics_in_action.py --n 6000 --seed 7 --threshold 0.55

Dependencies:
  pip install numpy pandas scikit-learn
"""

from __future__ import annotations

import argparse
import hashlib
import json
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder


# ======================================================================================
# Utilities
# ======================================================================================

def now_iso() -> str:
    """
    Return the current timestamp in ISO 8601 format.

    Why it exists:
    - When you write artifacts (model cards, fairness reports, audit logs),
      timestamps are essential for traceability and auditing.
    - ISO format is consistent across systems and human-readable.

    Example output:
    - "2026-01-03T22:15:10"
    """
    return datetime.now().isoformat(timespec="seconds")


def ensure_dir(path: Path) -> None:
    """
    Create the directory if it doesn't exist.

    Why it exists:
    - The script writes many outputs (CSVs/JSONs/logs). GitHub projects typically
      store these under a single folder like ./artifacts/.
    - This helper makes the script safe to run even if the folder isn't present.
    """
    path.mkdir(parents=True, exist_ok=True)


def write_json(path: Path, payload: Any) -> None:
    """
    Write a Python object to disk as pretty-printed JSON.

    Why it exists:
    - JSON is easy to read, share, and diff in GitHub.
    - 'ensure_ascii=False' preserves readable text (not escaped unicode).
    """
    path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")


def write_jsonl(path: Path, events: List[Dict[str, Any]]) -> None:
    """
    Append events to a JSON Lines file (one JSON record per line).

    Why JSONL (not JSON)?
    - Audit logs are typically append-only.
    - JSONL is ideal for log pipelines: each line is self-contained and can be streamed.

    Example:
    {"timestamp": "...", "event_type": "...", "payload": {...}}
    {"timestamp": "...", "event_type": "...", "payload": {...}}
    """
    with path.open("a", encoding="utf-8") as f:
        for e in events:
            f.write(json.dumps(e, ensure_ascii=False) + "\n")


# ======================================================================================
# Data Generation (Synthetic)
# ======================================================================================

def make_synthetic_hiring_data(n: int, seed: int) -> pd.DataFrame:
    """
    Create a synthetic dataset that resembles a simple "hiring screening" scenario.

    Purpose:
    - We need a dataset we can publish publicly without privacy risk.
    - We intentionally include:
        (a) direct identifiers: name, email
        (b) quasi-identifiers: age, zipcode, education
        (c) a sensitive attribute: group (0/1) used ONLY for fairness evaluation
        (d) a label: recommended_for_review (binary outcome)

    Important ethics note:
    - The label generation deliberately injects mild bias to show how fairness issues
      can appear even when you do not use the sensitive attribute in the model features.

    Columns:
    - name/email: direct identifiers (must be removed for privacy)
    - age/zipcode/education: quasi-identifiers (re-identification risk)
    - experience_years/skill_score: predictive features
    - group: sensitive attribute used only to measure fairness
    - recommended_for_review: outcome label used for training/evaluation
    """
    rng = np.random.default_rng(seed)

    # Quasi-identifiers
    age = rng.integers(21, 60, size=n)
    zipcode = rng.choice([10001, 10002, 10003, 10004, 10005, 10006, 10007, 10008], size=n)
    education = rng.choice(
        ["HighSchool", "Bachelors", "Masters", "PhD"],
        size=n,
        p=[0.20, 0.45, 0.28, 0.07],
    )

    # Predictive features (constructed with some randomness)
    experience_years = np.clip(rng.normal(loc=(age - 21) / 3, scale=2.5, size=n), 0, 30)
    skill_score = np.clip(rng.normal(loc=65, scale=12, size=n), 0, 100)

    # Sensitive attribute: think of this as a protected group indicator (demo only)
    group = rng.binomial(1, 0.45, size=n)

    # Encode education into an ordinal-ish number for label generation
    edu_map = {"HighSchool": 0, "Bachelors": 1, "Masters": 2, "PhD": 3}
    edu_num = np.array([edu_map[e] for e in education])

    # "Legitimate" signal: skill, experience, education affect label probability
    base = 0.03 * skill_score + 0.09 * experience_years + 0.25 * edu_num

    # Bias injection (for fairness demonstration):
    # We subtract a small amount for group==1, simulating historical bias.
    biased_score = base - 0.25 * group + rng.normal(0, 0.8, size=n)

    # Convert to probability via logistic function and sample binary label
    prob = 1 / (1 + np.exp(-(biased_score - 3.4)))
    y = rng.binomial(1, prob, size=n)

    df = pd.DataFrame(
        {
            "name": [f"Candidate_{i}" for i in range(n)],
            "email": [f"user{i}@example.com" for i in range(n)],
            "age": age,
            "zipcode": zipcode,
            "education": education,
            "experience_years": np.round(experience_years, 1),
            "skill_score": np.round(skill_score, 1),
            "group": group,
            "recommended_for_review": y,
        }
    )
    return df


# ======================================================================================
# Privacy
# ======================================================================================

def pseudonymize(df: pd.DataFrame, salt: str, id_cols: Tuple[str, ...] = ("name", "email")) -> pd.DataFrame:
    """
    Privacy Control #1: Pseudonymization

    What it does:
    - Creates a stable pseudo_id derived from the user's email (salted hash).
    - Drops direct identifiers like name and email.

    Why this matters:
    - Direct identifiers (name/email) are high-risk because they uniquely identify people.
    - Pseudonymization lowers risk while still allowing:
        - record linking inside the system
        - debugging/tracing without storing raw PII

    Why salted hashing:
    - Hashing without a salt is vulnerable to "dictionary attacks" (guess common emails).
    - Salt ensures that even if someone knows an email, they cannot easily reproduce its hash
      without the salt.

    Output:
    - pseudo_id (string)
    - all non-identifier columns preserved
    """
    out = df.copy()

    def pid(email: str) -> str:
        return hashlib.sha256((salt + str(email)).encode("utf-8")).hexdigest()[:12]

    out["pseudo_id"] = out["email"].map(pid)
    out = out.drop(columns=list(id_cols))

    # Put pseudo_id first for readability
    cols = ["pseudo_id"] + [c for c in out.columns if c != "pseudo_id"]
    return out[cols]


def generalize_quasi_identifiers(
    df: pd.DataFrame,
    k: int = 15,
    quasi: Tuple[str, ...] = ("age", "zipcode", "education"),
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Privacy Control #2: Generalization of quasi-identifiers (k-anonymity style check)

    Background:
    - Even if you remove direct identifiers, people can sometimes be re-identified
      using combinations of quasi-identifiers like:
        (age, zipcode, education)

    What we do:
    - age -> age_band (5-year bucket)
    - zipcode -> zip_prefix (first 3 digits + "**")
    - education stays (in real systems, it may also be generalized depending on risk)

    Then we compute:
    - group sizes for each (age_band, zip_prefix, education) combination

    How to interpret k:
    - k=15 means "each unique quasi-identifier combination should have at least 15 records"
      to reduce re-identification risk.
    - If many groups have counts below k, your data is not safe to release at that granularity.

    Returns:
    - generalized dataframe (safer for sharing)
    - group_sizes table (so you can see how many groups fall below k)
    """
    out = df.copy()

    # Generalize age: 23 -> "20-24", 38 -> "35-39", etc.
    out["age_band_start"] = (out["age"] // 5) * 5
    out["age_band"] = out["age_band_start"].astype(str) + "-" + (out["age_band_start"] + 4).astype(str)

    # Generalize zipcode: 10006 -> "100**"
    out["zip_prefix"] = out["zipcode"].astype(str).str[:3] + "**"

    # Drop precise values that increase re-identification risk
    out = out.drop(columns=["age", "zipcode", "age_band_start"])

    # Count group sizes across quasi identifier combinations
    group_sizes = (
        out.groupby(["age_band", "zip_prefix", "education"])
        .size()
        .reset_index(name="count")
        .sort_values("count")
    )
    return out, group_sizes


def dp_like_mean(values: np.ndarray, epsilon: float, value_range: Tuple[float, float], seed: int) -> float:
    """
    Privacy Control #3: DP-like noisy aggregate release (educational demo)

    What problem this addresses:
    - Sharing aggregates like "average skill score by group" can leak information,
      especially for small groups.
    - Differential Privacy (DP) is a formal approach where you add noise calibrated
      to how much a single individual's data can change the output.

    What this function implements:
    - A teaching approximation for the DP mean using Laplace noise.
    - Sensitivity of mean ~ (max-min)/n
      Explanation:
        If one person's value changes from min to max, the mean can shift by (max-min)/n

    Noise:
    - Laplace noise scale = sensitivity / epsilon
    - Larger epsilon -> less noise (less privacy, more accuracy)
    - Smaller epsilon -> more noise (more privacy, less accuracy)

    Important disclaimer:
    - This is NOT a production DP system.
    - Production DP requires careful accounting, composition rules, and privacy budgets.

    Returns:
    - noisy mean that demonstrates the concept of privacy-preserving aggregate sharing
    """
    rng = np.random.default_rng(seed)
    values = np.asarray(values, dtype=float)
    n = max(len(values), 1)

    sensitivity = (value_range[1] - value_range[0]) / n
    noise = rng.laplace(loc=0.0, scale=sensitivity / max(epsilon, 1e-9))
    return float(values.mean() + noise)


# ======================================================================================
# Transparency
# ======================================================================================

def build_pipeline(cat_cols: List[str], num_cols: List[str]) -> Pipeline:
    """
    Build a simple, interpretable ML pipeline.

    Why a pipeline:
    - Ensures preprocessing and modeling are bundled consistently.
    - Prevents "training vs inference mismatch" (a real deployment issue).

    Why logistic regression:
    - Interpretable: coefficients tell direction + strength of feature influence.
    - Fast and stable: good for demos and governance-driven systems.
    - Easy to explain to non-technical stakeholders.

    Preprocessing:
    - Categorical features -> one-hot encoding
    - Numeric features -> passed through unchanged
    """
    pre = ColumnTransformer(
        transformers=[
            ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
            ("num", "passthrough", num_cols),
        ]
    )
    clf = LogisticRegression(max_iter=2000)
    return Pipeline([("prep", pre), ("clf", clf)])


def feature_names_from_pipeline(pipe: Pipeline, cat_cols: List[str], num_cols: List[str]) -> List[str]:
    """
    Recover the expanded feature names after preprocessing.

    Why this exists:
    - One-hot encoding expands a single categorical column into multiple features.
      Example: education -> education_Bachelors, education_Masters, etc.
    - To interpret model coefficients, we must know which expanded features exist.
    """
    pre: ColumnTransformer = pipe.named_steps["prep"]
    ohe: OneHotEncoder = pre.named_transformers_["cat"]
    cat_names = list(ohe.get_feature_names_out(cat_cols))
    return cat_names + num_cols


def coefficient_table(pipe: Pipeline, cat_cols: List[str], num_cols: List[str]) -> pd.DataFrame:
    """
    Create a ranked table of model coefficients.

    What coefficients mean in logistic regression:
    - Positive coefficient -> increases probability of label=1
    - Negative coefficient -> decreases probability of label=1
    - Magnitude (absolute value) -> strength of influence

    Why it supports transparency:
    - Lets reviewers see what features drive decisions.
    - Helps identify suspicious proxies (e.g., ZIP code proxies for sensitive traits).

    Output:
    - feature: transformed feature name
    - coef: signed coefficient
    - abs_coef: magnitude used for ranking
    """
    names = feature_names_from_pipeline(pipe, cat_cols, num_cols)
    coefs = pipe.named_steps["clf"].coef_[0]
    tbl = pd.DataFrame({"feature": names, "coef": coefs, "abs_coef": np.abs(coefs)})
    return tbl.sort_values("abs_coef", ascending=False).reset_index(drop=True)


def reason_codes(pipe: Pipeline, row_df: pd.DataFrame, cat_cols: List[str], num_cols: List[str], top_k: int = 5) -> pd.DataFrame:
    """
    Generate simple "reason codes" for a single prediction.

    Goal:
    - Provide an explanation per decision: "Which factors mattered most?"

    How it works (approximation):
    - Logistic regression is linear in feature space.
    - After preprocessing, each feature has a value x_i and coefficient w_i.
    - Contribution ~ x_i * w_i
    - We rank contributions by absolute magnitude.

    Why this is useful:
    - Supports transparency at the individual decision level.
    - Helps debugging and stakeholder explanations.
    - Helps identify unfair feature effects in practice.

    Output:
    - top_k transformed features with highest influence for this specific row
    """
    pre: ColumnTransformer = pipe.named_steps["prep"]
    clf: LogisticRegression = pipe.named_steps["clf"]

    x_vec = pre.transform(row_df)
    x_dense = x_vec.toarray() if hasattr(x_vec, "toarray") else np.asarray(x_vec)

    contrib = x_dense[0] * clf.coef_[0]
    names = feature_names_from_pipeline(pipe, cat_cols, num_cols)

    out = pd.DataFrame({"feature": names, "contribution": contrib, "abs": np.abs(contrib)})
    return out.sort_values("abs", ascending=False).head(top_k)[["feature", "contribution"]].reset_index(drop=True)


def make_model_card(
    model_name: str,
    intended_use: str,
    limitations: List[str],
    data_notes: str,
    metrics: Dict[str, float],
    ethical_notes: Dict[str, str],
) -> Dict[str, Any]:
    """
    Create a lightweight model card (documentation artifact).

    Why model cards matter (Transparency + Accountability):
    - They document:
        - what the model is for (and not for)
        - what data it was trained on
        - performance metrics
        - known limitations and risks
    - In real systems, this is essential for governance and responsible deployment.

    Output:
    - A JSON-serializable dictionary stored in artifacts/model_card.json
    """
    return {
        "model_name": model_name,
        "created_at": now_iso(),
        "intended_use": intended_use,
        "data_notes": data_notes,
        "limitations": limitations,
        "evaluation_metrics": metrics,
        "ethical_notes": ethical_notes,
    }


# ======================================================================================
# Accountability
# ======================================================================================

def df_fingerprint(df: pd.DataFrame) -> str:
    """
    Create a fingerprint (hash) of a dataframe's content.

    Why it matters:
    - Accountability requires traceability: "Which dataset version trained this model?"
    - A fingerprint lets you later prove:
        - whether the training data changed
        - whether a model was trained on the expected data

    How it's done:
    - hash_pandas_object creates row-wise hashes
    - we hash the bytes using SHA-256 to get a stable signature
    """
    h = pd.util.hash_pandas_object(df, index=True).values.tobytes()
    return hashlib.sha256(h).hexdigest()


def pipeline_fingerprint(pipe: Pipeline) -> str:
    """
    Create a fingerprint of the model configuration.

    Why it matters:
    - You want to know if your model changed because:
        - hyperparameters changed
        - preprocessing steps changed
        - encoders changed
    - This helps with reproducibility and auditing.

    What it captures:
    - Pipeline parameters (deep=True), serialized deterministically.
    """
    params = pipe.get_params(deep=True)
    blob = json.dumps(params, sort_keys=True, default=str).encode("utf-8")
    return hashlib.sha256(blob).hexdigest()


@dataclass
class AuditEvent:
    """
    A structured audit event.

    Why use structured events:
    - Easier to parse and analyze than unstructured text logs.
    - Consistent format makes it easier to build dashboards or compliance reports later.

    Fields:
    - timestamp: when the event occurred
    - event_type: category (prediction, registry, hitl_policy, etc.)
    - payload: structured details
    """
    timestamp: str
    event_type: str
    payload: Dict[str, Any]


class Auditor:
    """
    An append-only auditor.

    How it works:
    - Buffer events in memory (self.buffer)
    - Flush them to JSONL (append-only) for traceability

    Why append-only:
    - Real audit logs should be difficult to tamper with.
    - JSONL enables streaming and event-based tooling.
    """
    def __init__(self, audit_path: Path):
        self.audit_path = audit_path
        self.buffer: List[Dict[str, Any]] = []

    def log(self, event_type: str, payload: Dict[str, Any]) -> None:
        """Add an event to the buffer."""
        self.buffer.append({"timestamp": now_iso(), "event_type": event_type, "payload": payload})

    def flush(self) -> None:
        """Write buffered events to disk and clear the buffer."""
        if self.buffer:
            write_jsonl(self.audit_path, self.buffer)
            self.buffer = []


def predict_with_audit(
    auditor: Auditor,
    pipe: Pipeline,
    row_df: pd.DataFrame,
    cat_cols: List[str],
    num_cols: List[str],
    reviewer: str,
    threshold: float,
) -> Dict[str, Any]:
    """
    Make a prediction and produce an audit record.

    What it logs:
    - who requested the prediction (reviewer)
    - which threshold was used (policy parameter)
    - probability and resulting decision
    - input features (what was evaluated)
    - reason codes (why the model leaned that way)

    Why it matters:
    - In accountable AI systems, you need to reconstruct:
        - what was predicted
        - why it was predicted
        - under which policy rules

    Return:
    - dictionary containing probability, decision, reason codes (useful for UI/logging)
    """
    p = float(pipe.predict_proba(row_df)[0, 1])
    decision = "REVIEW" if p >= threshold else "NO_REVIEW"
    reasons = reason_codes(pipe, row_df, cat_cols, num_cols, top_k=4).to_dict(orient="records")

    auditor.log(
        "prediction",
        {
            "reviewer": reviewer,
            "threshold": threshold,
            "probability": p,
            "decision": decision,
            "input_features": row_df.to_dict(orient="records")[0],
            "reason_codes": reasons,
        },
    )
    return {"probability": p, "decision": decision, "reason_codes": reasons}


def hitl_policy(probability: float, low: float = 0.45, high: float = 0.65) -> str:
    """
    Human-in-the-loop (HITL) policy.

    Why HITL is important:
    - Many decisions should NOT be fully automated (especially high-impact decisions).
    - A common pattern is:
        - Auto-decide when confidence is high
        - Escalate ambiguous cases to humans

    Policy:
    - probability < low  -> AUTO_NO_REVIEW
    - probability > high -> AUTO_REVIEW
    - otherwise          -> ESCALATE_TO_HUMAN

    This demonstrates accountability because:
    - it shows explicit decision governance instead of "model decides everything."
    """
    if probability < low:
        return "AUTO_NO_REVIEW"
    if probability > high:
        return "AUTO_REVIEW"
    return "ESCALATE_TO_HUMAN"


# ======================================================================================
# Fairness
# ======================================================================================

def subgroup_report(y_true: np.ndarray, y_pred: np.ndarray, sensitive: np.ndarray) -> pd.DataFrame:
    """
    Compute performance metrics separately for each sensitive group.

    Why subgroup metrics matter:
    - Overall accuracy can hide unequal performance.
    - A model can appear "good" on average while harming a subgroup.

    Metrics produced:
    - accuracy: correct predictions / total
    - precision: among predicted positives, how many were actually positive?
    - recall (TPR): among actual positives, how many did we catch?
    - selection_rate: fraction predicted positive (important for impact)

    selection_rate is critical for fairness:
    - If one group receives far fewer positive outcomes, it may indicate disparate impact.
    """
    rows = []
    for g in sorted(np.unique(sensitive)):
        idx = sensitive == g
        rows.append(
            {
                "group": int(g),
                "n": int(idx.sum()),
                "accuracy": float(accuracy_score(y_true[idx], y_pred[idx])),
                "precision": float(precision_score(y_true[idx], y_pred[idx], zero_division=0)),
                "recall": float(recall_score(y_true[idx], y_pred[idx], zero_division=0)),
                "selection_rate": float(y_pred[idx].mean()),
            }
        )
    return pd.DataFrame(rows)


def disparate_impact(y_pred: np.ndarray, sensitive: np.ndarray) -> Tuple[Optional[float], Dict[int, float]]:
    """
    Compute the Disparate Impact (DI) ratio.

    What DI measures:
    - DI compares selection rates between groups.
    - selection_rate(group) = P(predicted_positive | group)

    DI ratio commonly reported as:
    - DI = selection_rate(group_1) / selection_rate(group_0)

    Interpretation:
    - DI ~= 1.0  -> both groups selected at similar rates
    - DI < 1.0   -> group_1 is selected less often than group_0
    - DI > 1.0   -> group_1 is selected more often than group_0

    Why it matters:
    - DI is a widely used "impact" fairness metric (it does NOT prove discrimination,
      but it is a strong signal to investigate).
    - In some compliance contexts, values far from 1.0 raise flags.

    Return:
    - (di_ratio, selection_rates)
      - di_ratio is None if division by zero occurs
      - selection_rates is a dict like {0: 0.42, 1: 0.31}
    """
    rates: Dict[int, float] = {}
    for g in sorted(np.unique(sensitive)):
        idx = sensitive == g
        rates[int(g)] = float(y_pred[idx].mean())

    # Protect against division by zero (if group0 selection rate is 0)
    if 0 in rates and rates[0] > 0 and 1 in rates:
        return float(rates[1] / rates[0]), rates
    return None, rates


def equal_opportunity_diff(y_true: np.ndarray, y_pred: np.ndarray, sensitive: np.ndarray) -> Tuple[float, Dict[int, float]]:
    """
    Compute Equal Opportunity Difference (EOD).

    What EOD measures:
    - Equal opportunity focuses on true positive rate (TPR / recall).
    - TPR(group) = P(predicted_positive | actually_positive, group)

    EOD:
    - EOD = TPR(group_1) - TPR(group_0)

    Interpretation:
    - EOD ~= 0.0 -> both groups get positives recognized at similar rates
    - Positive EOD -> group_1 has higher TPR than group_0
    - Negative EOD -> group_1 has lower TPR (more missed positives)

    Why it matters:
    - DI focuses on outcome rates (impact).
    - Equal opportunity focuses on errors among the truly eligible/positive cases.
    - In many high-stakes settings, error parity is as important as outcome parity.

    Return:
    - (difference, tpr_by_group)
    """
    tpr: Dict[int, float] = {}
    for g in sorted(np.unique(sensitive)):
        idx = sensitive == g
        tpr[int(g)] = float(recall_score(y_true[idx], y_pred[idx], zero_division=0))
    return float(tpr.get(1, 0.0) - tpr.get(0, 0.0)), tpr


def apply_group_thresholds(proba: np.ndarray, sensitive: np.ndarray, t0: float, t1: float) -> np.ndarray:
    """
    Apply different decision thresholds per group.

    Why this exists:
    - It's a simple demonstration of a fairness mitigation approach:
        "change the decision threshold to reduce disparity."

    Important warning:
    - Using group-specific thresholds is a sensitive governance decision.
    - In real systems, this must be reviewed legally and ethically because:
        - you are explicitly using group membership during decision-making

    How it works:
    - For group==0, positive if probability >= t0
    - For group==1, positive if probability >= t1

    Returns:
    - binary predictions (0/1)
    """
    out = np.zeros_like(proba, dtype=int)
    out[sensitive == 0] = (proba[sensitive == 0] >= t0).astype(int)
    out[sensitive == 1] = (proba[sensitive == 1] >= t1).astype(int)
    return out


def search_thresholds(
    proba: np.ndarray,
    y_true: np.ndarray,
    sensitive: np.ndarray,
    grid: np.ndarray,
) -> Dict[str, float]:
    """
    Search for thresholds (t0, t1) that reduce DI disparity while keeping accuracy reasonable.

    Why grid search:
    - We want an understandable mitigation method.
    - You can inspect and explain the search space easily.

    Objective used (demo):
    - score = -abs(DI - 1.0) + 0.15 * accuracy
      Explanation:
        - push DI toward 1.0 (reduce selection disparity)
        - still reward decent accuracy so we don't "fix fairness" by making the model useless

    Output:
    - dict with best thresholds and achieved metrics:
        {"t0": ..., "t1": ..., "di": ..., "acc": ..., "score": ...}
    """
    best: Optional[Dict[str, float]] = None
    for t0 in grid:
        for t1 in grid:
            pred = apply_group_thresholds(proba, sensitive, float(t0), float(t1))
            di, _ = disparate_impact(pred, sensitive)
            if di is None:
                continue

            acc = float(accuracy_score(y_true, pred))

            # Demo objective: prioritize DI closeness to 1.0, lightly preserve accuracy
            score = -abs(di - 1.0) + 0.15 * acc

            cand = {"t0": float(t0), "t1": float(t1), "di": float(di), "acc": acc, "score": float(score)}
            if best is None or cand["score"] > best["score"]:
                best = cand

    return best or {"t0": 0.5, "t1": 0.5, "di": float("nan"), "acc": 0.0, "score": float("-inf")}


# ======================================================================================
# Main workflow
# ======================================================================================

def main() -> None:
    """
    Orchestrates the end-to-end PTAF workflow.

    Major steps:
    1) Generate synthetic data
    2) Apply privacy protections and export privacy artifacts
    3) Train an interpretable model and export transparency artifacts
    4) Log accountability artifacts (audit log + fingerprints + HITL policy)
    5) Evaluate fairness and export baseline + mitigated reports
    """
    parser = argparse.ArgumentParser(description="AI Ethics in Action (PTAF) demo.")
    parser.add_argument("--n", type=int, default=4000, help="Number of rows to generate.")
    parser.add_argument("--seed", type=int, default=42, help="Random seed.")
    parser.add_argument("--threshold", type=float, default=0.50, help="Decision threshold for REVIEW.")
    parser.add_argument("--epsilon", type=float, default=0.8, help="DP-like epsilon for aggregate demos.")
    parser.add_argument("--k", type=int, default=15, help="k for k-anonymity style generalization check.")
    args, _ = parser.parse_known_args()

    artifacts = Path("artifacts")
    ensure_dir(artifacts)

    # -------------------------
    # 1) Data creation
    # -------------------------
    df_raw = make_synthetic_hiring_data(n=args.n, seed=args.seed)
    df_raw.to_csv(artifacts / "raw_data.csv", index=False)

    # -------------------------
    # 2) Privacy protections
    # -------------------------
    salt = f"PTAF_DEMO_SALT_{args.seed}"
    df_priv = pseudonymize(df_raw, salt=salt)
    df_priv.to_csv(artifacts / "pseudonymized_data.csv", index=False)

    df_gen, group_sizes = generalize_quasi_identifiers(df_priv, k=args.k)
    df_gen.to_csv(artifacts / "generalized_data.csv", index=False)
    group_sizes.to_csv(artifacts / "k_anonymity_group_sizes.csv", index=False)

    # DP-like noisy aggregates (demo): mean skill by group
    dp_report = []
    for g in [0, 1]:
        mask = df_priv["group"].values == g
        real_mean = float(df_raw.loc[mask, "skill_score"].mean())
        dp_mean = dp_like_mean(
            values=df_raw.loc[mask, "skill_score"].values,
            epsilon=args.epsilon,
            value_range=(0.0, 100.0),
            seed=args.seed + 100 + g,
        )
        dp_report.append({"group": g, "real_mean_skill": real_mean, "dp_like_mean_skill": dp_mean})

    write_json(
        artifacts / "dp_like_aggregates.json",
        {"created_at": now_iso(), "epsilon": args.epsilon, "items": dp_report},
    )

    # -------------------------
    # 3) Train interpretable model
    # -------------------------
    # Target label
    y = df_priv["recommended_for_review"].values.astype(int)

    # Sensitive attribute used ONLY for fairness evaluation
    sensitive = df_priv["group"].values.astype(int)

    # Feature set excludes:
    # - pseudo_id: identifier-like (not predictive, and increases linkage risk)
    # - group: sensitive attribute (excluded to show fairness issues can still appear via proxies)
    X = df_priv.drop(columns=["recommended_for_review", "pseudo_id", "group"])

    cat_cols = ["education"]
    num_cols = [c for c in X.columns if c not in cat_cols]

    X_train, X_test, y_train, y_test, s_train, s_test = train_test_split(
        X, y, sensitive, test_size=0.25, random_state=args.seed, stratify=y
    )

    pipe = build_pipeline(cat_cols, num_cols)
    pipe.fit(X_train, y_train)

    proba = pipe.predict_proba(X_test)[:, 1]
    pred = (proba >= args.threshold).astype(int)

    metrics = {
        "accuracy": float(accuracy_score(y_test, pred)),
        "precision": float(precision_score(y_test, pred)),
        "recall": float(recall_score(y_test, pred)),
        "threshold": float(args.threshold),
    }
    write_json(artifacts / "metrics.json", {"created_at": now_iso(), "metrics": metrics})

    # Transparency artifacts
    coef_tbl = coefficient_table(pipe, cat_cols, num_cols)
    coef_tbl.to_csv(artifacts / "coefficients.csv", index=False)

    explanations = []
    sample_indices = np.linspace(0, len(X_test) - 1, num=min(12, len(X_test)), dtype=int)
    for idx in sample_indices:
        row = X_test.iloc[[idx]]
        p = float(pipe.predict_proba(row)[0, 1])
        reasons_tbl = reason_codes(pipe, row, cat_cols, num_cols, top_k=5)
        explanations.append(
            {
                "row_index": int(idx),
                "probability": p,
                "decision": "REVIEW" if p >= args.threshold else "NO_REVIEW",
                "reason_codes": reasons_tbl.to_dict(orient="records"),
                "input": row.to_dict(orient="records")[0],
            }
        )
    write_json(artifacts / "sample_explanations.json", {"created_at": now_iso(), "items": explanations})

    model_card = make_model_card(
        model_name="HiringScreen-PTAF-v1 (LogisticRegression)",
        intended_use="Assist with screening by recommending candidates for human review (not an automated hiring decision).",
        limitations=[
            "Synthetic dataset used for demonstration; not representative of real labor markets.",
            "Model behavior depends on feature definitions; requires domain review before any real use.",
            "Interpretability is limited to linear contributions; does not capture complex interactions.",
        ],
        data_notes="Trained on education, experience_years, skill_score; direct identifiers removed; sensitive attribute used for evaluation only.",
        metrics=metrics,
        ethical_notes={
            "privacy": "Direct identifiers removed; pseudonymous IDs used; quasi-identifiers generalized for sharing.",
            "transparency": "Coefficients and per-decision reason codes exported; model card included.",
            "accountability": "Audit logs + dataset/model fingerprints; HITL escalation policy demonstrated.",
            "fairness": "Subgroup metrics, disparate impact, and equal opportunity reported; threshold mitigation demonstrated.",
        },
    )
    write_json(artifacts / "model_card.json", model_card)

    # -------------------------
    # 4) Accountability artifacts
    # -------------------------
    auditor = Auditor(audit_path=artifacts / "audit_log.jsonl")

    train_fprint = df_fingerprint(
        pd.concat([X_train.reset_index(drop=True), pd.Series(y_train, name="label")], axis=1)
    )
    model_fprint = pipeline_fingerprint(pipe)

    auditor.log(
        "model_registry",
        {
            "model_name": model_card["model_name"],
            "dataset_fingerprint": train_fprint,
            "model_fingerprint": model_fprint,
            "metrics": metrics,
        },
    )

    # Log a few audited predictions + HITL decisions
    for idx in sample_indices[:6]:
        row = X_test.iloc[[idx]]
        out = predict_with_audit(
            auditor=auditor,
            pipe=pipe,
            row_df=row,
            cat_cols=cat_cols,
            num_cols=num_cols,
            reviewer="demo_user",
            threshold=args.threshold,
        )
        auditor.log(
            "hitl_policy",
            {
                "row_index": int(idx),
                "probability": out["probability"],
                "action": hitl_policy(out["probability"]),
            },
        )

    auditor.flush()

    # -------------------------
    # 5) Fairness evaluation + mitigation demo
    # -------------------------
    base_subgroups = subgroup_report(y_test, pred, s_test)
    base_subgroups.to_csv(artifacts / "fairness_subgroup_metrics_baseline.csv", index=False)

    di, sel_rates = disparate_impact(pred, s_test)
    eod, tpr = equal_opportunity_diff(y_test, pred, s_test)

    fairness_summary = {
        "created_at": now_iso(),
        "baseline": {
            "threshold": float(args.threshold),
            "selection_rates": sel_rates,
            "disparate_impact_ratio_g1_over_g0": di,
            "tpr_by_group": tpr,
            "equal_opportunity_diff_g1_minus_g0": eod,
        },
    }

    # Mitigation: group thresholds (demo). In real systems this needs governance review.
    grid = np.linspace(0.35, 0.65, 13)
    best = search_thresholds(proba, y_test, s_test, grid=grid)

    pred_m = apply_group_thresholds(proba, s_test, best["t0"], best["t1"])
    mitigated_subgroups = subgroup_report(y_test, pred_m, s_test)
    mitigated_subgroups.to_csv(artifacts / "fairness_subgroup_metrics_mitigated.csv", index=False)

    di2, sel_rates2 = disparate_impact(pred_m, s_test)
    eod2, tpr2 = equal_opportunity_diff(y_test, pred_m, s_test)

    fairness_summary["mitigated"] = {
        "thresholds": {"group0": best["t0"], "group1": best["t1"]},
        "accuracy": float(accuracy_score(y_test, pred_m)),
        "selection_rates": sel_rates2,
        "disparate_impact_ratio_g1_over_g0": di2,
        "tpr_by_group": tpr2,
        "equal_opportunity_diff_g1_minus_g0": eod2,
        "search_objective": {"score": best["score"], "baseline_acc": metrics["accuracy"]},
        "note": "Mitigation shown for learning; real deployment requires legal/ethical review and stakeholder approval.",
    }
    write_json(artifacts / "fairness_summary.json", fairness_summary)

    # -------------------------
    # Console summary
    # -------------------------
    print("\n=== PTAF Demo Complete ===")
    print(f"Artifacts written to: {artifacts.resolve()}")
    print("\nKey files:")
    for name in [
        "model_card.json",
        "coefficients.csv",
        "sample_explanations.json",
        "audit_log.jsonl",
        "fairness_summary.json",
        "fairness_subgroup_metrics_baseline.csv",
        "fairness_subgroup_metrics_mitigated.csv",
        "dp_like_aggregates.json",
        "k_anonymity_group_sizes.csv",
    ]:
        p = artifacts / name
        if p.exists():
            print(f" - {name}")

    print("\nHeadline metrics:")
    print(f" - Accuracy:  {metrics['accuracy']:.3f}")
    print(f" - Precision: {metrics['precision']:.3f}")
    print(f" - Recall:    {metrics['recall']:.3f}")
    print("\nFairness (baseline):")
    print(f" - Disparate Impact (g1/g0): {di if di is not None else 'n/a'}")
    print(f" - Equal Opportunity Diff:   {eod:.3f}")
    print("\nFairness (mitigated demo):")
    print(f" - Thresholds: group0={best['t0']:.2f}, group1={best['t1']:.2f}")
    print(f" - Disparate Impact (g1/g0): {di2 if di2 is not None else 'n/a'}")
    print(f" - Equal Opportunity Diff:   {eod2:.3f}")
    print("")


if __name__ == "__main__":
    main()



=== PTAF Demo Complete ===
Artifacts written to: D:\Scripts\Python Scripts\Responsible AI\artifacts

Key files:
 - model_card.json
 - coefficients.csv
 - sample_explanations.json
 - audit_log.jsonl
 - fairness_summary.json
 - fairness_subgroup_metrics_baseline.csv
 - fairness_subgroup_metrics_mitigated.csv
 - dp_like_aggregates.json
 - k_anonymity_group_sizes.csv

Headline metrics:
 - Accuracy:  0.620
 - Precision: 0.455
 - Recall:    0.180

Fairness (baseline):
 - Disparate Impact (g1/g0): 1.0408850408850407
 - Equal Opportunity Diff:   0.025

Fairness (mitigated demo):
 - Thresholds: group0=0.45, group1=0.45
 - Disparate Impact (g1/g0): 1.0027314408350405
 - Equal Opportunity Diff:   0.024

