In [48]:
# ============================================================
# Mini ML Demo: "AI Ethics Classifier"
# ============================================================
# Goal:
# 1) Predict AI ethics principle:
#       - Privacy, Transparency, Accountability, Fairness
# 2) Predict lifecycle stage:
#       - Data, Development, Deployment
#
# This script is intentionally:
# - small, interpretable, expandable
# - interactive (quiz + testing)
#
# Dependencies:
#   pip install scikit-learn
# ============================================================

# =========================
# SECTION 1 — IMPORTS + DATA
# =========================
import random
from dataclasses import dataclass
from typing import List, Tuple

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report, confusion_matrix


# ------------------------------------------------------------
# 1A) Seed labeled training dataset
# ------------------------------------------------------------
# We create 50 examples per ethics principle (200 total).
# Each example also has a lifecycle label: Data / Development / Deployment.
#
# NOTE:
# - These are synthetic but realistic policy/risk/safeguard statements.
# - You can later add your own domain-specific statements to improve accuracy.
# ------------------------------------------------------------

SeedExample = Tuple[str, str, str]  # (text, ethics_label, lifecycle_label)

seed_examples: List[SeedExample] = []

# --- PRIVACY (50) ---
privacy_data = [
    "Collect only the minimum personal data required for the task.",
    "Obtain explicit consent before collecting biometric information.",
    "Use data minimization when building user profiles for personalization.",
    "Provide clear opt-in choices for collecting location data.",
    "Remove direct identifiers during preprocessing to reduce re-identification risk.",
    "Anonymize or pseudonymize records before using them in training datasets.",
    "Avoid collecting sensitive attributes unless strictly necessary and justified.",
    "Define a data retention limit and delete records after the purpose is met.",
    "Use secure transfer when ingesting customer data from third parties.",
    "Document the lawful basis for collecting personal data in the data pipeline.",
    "Mask email addresses and phone numbers during dataset preparation.",
    "Use differential privacy where feasible to protect individual contributions.",
    "Ensure scraping respects consent signals and site policies to avoid privacy harm.",
    "Do not combine datasets in ways that could enable identity linkage.",
    "Strip metadata from uploaded files to prevent accidental data leakage.",
    "Require signed data processing agreements before sharing user data internally.",
    "Encrypt data at rest during collection and preprocessing steps.",
]
privacy_dev = [
    "Use privacy-preserving training techniques when handling sensitive data.",
    "Prevent training on raw chat logs unless consent and safeguards are in place.",
    "Run privacy risk assessments before training on user-generated content.",
    "Test the model for memorization of personal information during evaluation.",
    "Block model outputs that could reveal private identifiers from training data.",
    "Use secure sandbox environments for model development with restricted access.",
    "Apply access controls so only authorized staff can view sensitive datasets.",
    "Audit prompts and logs used in fine-tuning to avoid exposing PII.",
    "Redact sensitive text fields before model training begins.",
    "Use k-anonymity style checks when preparing training splits.",
    "Track dataset provenance to ensure personal data was collected appropriately.",
    "Avoid using shadow datasets that were gathered without user awareness.",
    "Validate that de-identification survives joins and feature engineering steps.",
    "Train with federated learning when raw data cannot leave user devices.",
    "Ensure evaluation datasets do not contain leaked customer identifiers.",
    "Run membership inference tests to estimate privacy leakage risk.",
]
privacy_deploy = [
    "Provide a right-to-delete workflow so users can request data removal.",
    "Allow users to opt out of data collection without losing core service access.",
    "Enable users to download and review the personal data the system stores.",
    "Log access to user data and review logs for suspicious usage patterns.",
    "Rotate encryption keys and enforce secure storage for deployed logs.",
    "Avoid storing raw prompts if they may contain personal information.",
    "Implement automatic redaction of PII in monitoring and analytics.",
    "Use strict retention policies for inference logs and delete them regularly.",
    "Offer a privacy notice describing how inference data is used and stored.",
    "Provide controls to disable personalized tracking in production.",
    "Ensure incident response includes notification procedures for data breaches.",
    "Restrict third-party telemetry to prevent unapproved data sharing.",
    "Use on-device inference for highly sensitive use cases when feasible.",
    "Provide a privacy contact channel for user requests and complaints.",
    "Periodically review deployed data flows for scope creep and overcollection.",
    "Ensure backups also respect deletion requests and retention limits.",
    "Monitor for prompt injection attempts that try to extract private data.",
]

# Make exactly 50 Privacy examples by selecting 17+16+17 = 50
privacy_examples = []
privacy_examples.extend([(t, "Privacy", "Data") for t in privacy_data[:17]])
privacy_examples.extend([(t, "Privacy", "Development") for t in privacy_dev[:16]])
privacy_examples.extend([(t, "Privacy", "Deployment") for t in privacy_deploy[:17]])
seed_examples.extend(privacy_examples)

# --- TRANSPARENCY (50) ---
trans_data = [
    "Explain what data is collected and why in plain language.",
    "Disclose whether user data will be used for training or analytics.",
    "Provide a data dictionary describing each feature used by the model.",
    "Clearly label sensitive data fields and how they are processed.",
    "Publish the data sources used to build the training dataset.",
    "Document how missing values are handled during preprocessing.",
    "State the retention period and the purpose for each collected data type.",
    "Provide users with a clear consent screen rather than hidden defaults.",
    "Disclose when data is obtained from brokers or third-party providers.",
    "Explain how data is sampled so users understand representativeness limits.",
    "Describe what data is excluded and why, including filtering rules.",
    "Disclose whether synthetic data is used and how it is generated.",
    "Provide a notice about web scraping and the scope of collected content.",
    "Explain the impact of normalization, tokenization, and cleaning steps.",
    "Maintain a changelog for dataset versions used in the project.",
    "Describe how labels were created and the risk of labeling bias.",
    "Publish known data limitations and gaps that affect performance.",
]
trans_dev = [
    "Use model cards to document intended use and limitations.",
    "Provide system documentation describing architecture and training setup.",
    "Explain key evaluation metrics and why they were selected.",
    "Disclose the main factors influencing high-impact predictions.",
    "Provide interpretable explanations for model decisions where feasible.",
    "Document hyperparameters and training configuration for reproducibility.",
    "Report subgroup performance results to reveal uneven outcomes.",
    "Explain why a particular algorithm was chosen over alternatives.",
    "Provide calibration plots or reliability summaries for risk scores.",
    "Describe the data split method and how leakage was prevented.",
    "Write clear release notes for each model version.",
    "Document known failure modes discovered during testing.",
    "Provide transparency on human review involvement in evaluation.",
    "Explain the role of heuristics, rules, or post-processing layers.",
    "Publish a summary of red-teaming findings and mitigations applied.",
    "Disclose when performance degrades outside the training distribution.",
]
trans_deploy = [
    "Provide explanations for high-impact decisions like loans or hiring.",
    "Tell users when they are interacting with an AI system.",
    "Disclose confidence or uncertainty signals for model outputs.",
    "Provide an appeal process when the system makes consequential decisions.",
    "Explain what monitoring is performed and what triggers interventions.",
    "Clearly state if user interactions are logged for improvement purposes.",
    "Provide a user-facing help page describing how the AI works at a high level.",
    "Display reasons codes for automated approvals or denials when appropriate.",
    "Disclose when a human is in the loop versus fully automated decisions.",
    "Publish an uptime and incident report for critical AI services.",
    "Provide a feedback button so users can report incorrect outputs.",
    "Notify users when the model version changes in meaningful ways.",
    "Explain content moderation actions with a clear policy reference.",
    "Provide transparency reports about enforcement and error rates.",
    "Explain why certain content was generated or refused by the model.",
    "Disclose how ranking or recommendation criteria are determined.",
    "Provide an audit-friendly log of decisions for regulators and users.",
]

# 17 + 16 + 17 = 50
trans_examples = []
trans_examples.extend([(t, "Transparency", "Data") for t in trans_data[:17]])
trans_examples.extend([(t, "Transparency", "Development") for t in trans_dev[:16]])
trans_examples.extend([(t, "Transparency", "Deployment") for t in trans_deploy[:17]])
seed_examples.extend(trans_examples)

# --- ACCOUNTABILITY (50) ---
acct_data = [
    "Assign an owner responsible for approving data sources and usage rights.",
    "Require governance review before ingesting new third-party datasets.",
    "Maintain audit trails for data access and dataset modifications.",
    "Document who approved collection of sensitive attributes and why.",
    "Use approval workflows for adding new data fields to production pipelines.",
    "Ensure data vendors provide compliance documentation and attestations.",
    "Record dataset lineage so issues can be traced back to origin.",
    "Define escalation paths when data quality issues are detected.",
    "Use access reviews to ensure only authorized staff can pull data exports.",
    "Log data transformations so errors can be reproduced and fixed.",
    "Assign responsibility for data retention and deletion policy enforcement.",
    "Require sign-off when combining datasets that increase risk.",
    "Create accountability checklists for data collection and consent.",
    "Establish clear roles for data stewards and data custodians.",
    "Perform periodic audits of dataset compliance with internal policies.",
    "Document legal review outcomes for data acquisition.",
    "Require incident reporting when data misuse is suspected.",
]
acct_dev = [
    "Assign a named model owner and keep audit trails for training runs.",
    "Use go/no-go release gates and require documented approvals.",
    "Run red teaming before release and record mitigation decisions.",
    "Track evaluation results and who signed off on deployment readiness.",
    "Maintain reproducible training pipelines so issues can be investigated.",
    "Require peer review for model changes that affect high-impact outcomes.",
    "Use checklists for risk assessment before promoting a model version.",
    "Define responsibility for model performance targets and safety metrics.",
    "Keep a record of training data versions and code commit hashes.",
    "Document who can override safeguards and under what conditions.",
    "Ensure a security review is completed before model launch.",
    "Set up accountability for responding to discovered bias or harm.",
    "Require internal audits for compliance with model governance policy.",
    "Log experiments and rationale for major architectural decisions.",
    "Establish model review boards for sensitive use cases.",
    "Ensure evaluation includes adversarial testing and is documented.",
]
acct_deploy = [
    "Have incident response playbooks for AI harm and escalation procedures.",
    "Provide channels for users to report harm and track resolution timelines.",
    "Monitor the system in production and assign on-call responsibility.",
    "Define rollback procedures when deployed performance degrades.",
    "Keep logs sufficient for audits while respecting privacy constraints.",
    "Investigate complaints and document corrective actions taken.",
    "Perform post-incident reviews after failures and update safeguards.",
    "Ensure there is a clear appeals process with accountable decision makers.",
    "Define SLAs for responding to high-severity model issues.",
    "Use monitoring dashboards with ownership for each key metric.",
    "Document how human reviewers are trained and held accountable.",
    "Record decisions made during crisis management for later audit.",
    "Establish responsibility for vendor-managed AI components in production.",
    "Require periodic compliance reviews for deployed AI systems.",
    "Ensure leadership sign-off for continuing operation after major incidents.",
    "Maintain a public-facing contact for regulatory inquiries when required.",
    "Track and report recurring failure patterns and mitigation effectiveness.",
]

# 17 + 16 + 17 = 50
acct_examples = []
acct_examples.extend([(t, "Accountability", "Data") for t in acct_data[:17]])
acct_examples.extend([(t, "Accountability", "Development") for t in acct_dev[:16]])
acct_examples.extend([(t, "Accountability", "Deployment") for t in acct_deploy[:17]])
seed_examples.extend(acct_examples)

# --- FAIRNESS (50) ---
fair_data = [
    "Ensure representative data coverage across demographic groups.",
    "Check that data collection does not systematically exclude minorities.",
    "Balance the dataset to avoid underrepresentation of protected groups.",
    "Audit labels for bias introduced by annotator assumptions.",
    "Measure sampling bias when data comes from a single region or platform.",
    "Avoid proxies for protected attributes unless justified and controlled.",
    "Document demographic distribution and gaps in the training data.",
    "Collect data from diverse sources to reduce skewed representation.",
    "Validate that translation or transcription errors do not target certain accents.",
    "Ensure accessibility data includes users with disabilities.",
    "Review whether historical data encodes discriminatory outcomes.",
    "Avoid using biased crime data without accounting for policing disparities.",
    "Ensure images include diverse skin tones and lighting conditions.",
    "Check that data cleaning does not remove dialects disproportionately.",
    "Assess whether consent processes exclude low-literacy populations.",
    "Use stratified sampling so minority classes are not ignored.",
    "Evaluate label consistency across annotators for sensitive categories.",
]
fair_dev = [
    "Evaluate performance by subgroup, not just overall accuracy.",
    "Use fairness metrics like equalized odds or demographic parity where relevant.",
    "Test for disparate impact in model outputs across protected groups.",
    "Run bias audits on embeddings and representation learning components.",
    "Perform counterfactual fairness checks where feasible.",
    "Use reweighting or resampling to reduce training bias.",
    "Validate that thresholds do not unfairly penalize one group.",
    "Check calibration across groups, not only average calibration.",
    "Include fairness constraints when optimizing high-impact models.",
    "Use diverse evaluation sets to detect fairness problems early.",
    "Perform error analysis specifically on historically marginalized groups.",
    "Document trade-offs between accuracy and fairness metrics.",
    "Avoid using a single global threshold if it harms certain subgroups.",
    "Assess whether human review introduces biased overrides.",
    "Test robustness for different dialects, accents, and writing styles.",
    "Validate that the model does not amplify stereotypes in generation.",
]
fair_deploy = [
    "Monitor fairness drift over time after deployment.",
    "Track subgroup outcomes and alert when disparities increase.",
    "Provide a mechanism for impacted users to contest unfair decisions.",
    "Recalibrate or retrain when group performance gaps widen.",
    "Run periodic fairness audits on deployed models and publish summaries.",
    "Use human oversight for high-risk decisions affecting vulnerable groups.",
    "Monitor feedback for signals of discriminatory behavior in production.",
    "Ensure content moderation does not silence certain communities unfairly.",
    "Evaluate recommendation exposure so minority creators are not suppressed.",
    "Conduct A/B tests with fairness constraints to prevent harmful rollouts.",
    "Provide accessible support channels for users experiencing discrimination.",
    "Check post-deployment thresholds and policy rules for disparate impact.",
    "Use drift detection to identify distribution shifts affecting subgroups.",
    "Maintain fairness KPIs and assign owners for remediation actions.",
    "Ensure appeals are reviewed consistently across user groups.",
    "Audit vendor components for fairness before integrating into production.",
    "Monitor language model toxicity differentially across identity mentions.",
]

# 17 + 16 + 17 = 50
fair_examples = []
fair_examples.extend([(t, "Fairness", "Data") for t in fair_data[:17]])
fair_examples.extend([(t, "Fairness", "Development") for t in fair_dev[:16]])
fair_examples.extend([(t, "Fairness", "Deployment") for t in fair_deploy[:17]])
seed_examples.extend(fair_examples)

# Safety check
assert len(seed_examples) == 200, f"Expected 200 examples, got {len(seed_examples)}"

In [49]:
# ==============================
# SECTION 2 — INTERACTIVE QUIZ
# ==============================
# Purpose:
# - Before training, help the user understand what each principle means.
# - The quiz is multiple-choice + immediate feedback.
#
# How it works:
# - User answers 12 questions.
# - We show score + explanations.
# - Then we ask if they want to proceed to training.
# ==============================

@dataclass
class QuizQ:
    question: str
    options: List[str]        # A, B, C, D
    answer: str              # "A"/"B"/"C"/"D"
    explanation: str

QUIZ: List[QuizQ] = [
    QuizQ(
        question="Which principle is MOST about limiting collection and giving users control over personal data?",
        options=["A) Fairness", "B) Privacy", "C) Transparency", "D) Accountability"],
        answer="B",
        explanation="Privacy focuses on consent, minimization, deletion, and preventing exposure of personal data."
    ),
    QuizQ(
        question="A model denies a loan. The user asks: 'Why?' Which principle is MOST relevant?",
        options=["A) Transparency", "B) Privacy", "C) Fairness", "D) Accountability"],
        answer="A",
        explanation="Transparency includes explainability and meaningful reasons for high-impact decisions."
    ),
    QuizQ(
        question="Who is responsible if the deployed model causes harm and must respond with fixes and documentation?",
        options=["A) Transparency", "B) Accountability", "C) Privacy", "D) Fairness"],
        answer="B",
        explanation="Accountability is about ownership, audit trails, incident response, and corrective action."
    ),
    QuizQ(
        question="The model performs well overall but poorly for one demographic group. Which principle is violated?",
        options=["A) Accountability", "B) Privacy", "C) Fairness", "D) Transparency"],
        answer="C",
        explanation="Fairness addresses subgroup performance, disparate impact, and equitable outcomes."
    ),
    QuizQ(
        question="Which lifecycle stage MOST relates to consent screens, collection limits, and preprocessing steps?",
        options=["A) Deployment", "B) Development", "C) Data", "D) Monitoring"],
        answer="C",
        explanation="Data stage includes collection + preprocessing (cleaning, de-identification, retention)."
    ),
    QuizQ(
        question="Which lifecycle stage MOST relates to training, hyperparameters, and evaluation?",
        options=["A) Development", "B) Data", "C) Deployment", "D) Governance"],
        answer="A",
        explanation="Development includes training + evaluation, and model documentation like model cards."
    ),
    QuizQ(
        question="Which lifecycle stage MOST relates to monitoring drift, incident response, and user appeals?",
        options=["A) Data", "B) Development", "C) Deployment", "D) Pretraining"],
        answer="C",
        explanation="Deployment includes rollout, monitoring, user feedback, and ongoing risk management."
    ),
    QuizQ(
        question="Publishing a model card describing limitations and intended use is mainly about:",
        options=["A) Privacy", "B) Transparency", "C) Fairness", "D) Accountability"],
        answer="B",
        explanation="Model cards are a transparency practice: clear documentation and disclosure."
    ),
    QuizQ(
        question="Running red-teaming and requiring go/no-go approval before release is mainly about:",
        options=["A) Accountability", "B) Privacy", "C) Transparency", "D) Fairness"],
        answer="A",
        explanation="Accountability includes governance, gates, audits, and responsibility for safe releases."
    ),
    QuizQ(
        question="Removing personal identifiers from datasets is mainly about:",
        options=["A) Transparency", "B) Privacy", "C) Accountability", "D) Fairness"],
        answer="B",
        explanation="De-identification is a core privacy safeguard to reduce exposure and re-identification risk."
    ),
    QuizQ(
        question="Tracking subgroup outcomes after deployment to detect disparities is mainly about:",
        options=["A) Fairness", "B) Transparency", "C) Privacy", "D) Accountability"],
        answer="A",
        explanation="This is fairness monitoring—detecting and correcting unequal outcomes over time."
    ),
    QuizQ(
        question="Keeping audit trails of training runs and approvals is mainly about:",
        options=["A) Transparency", "B) Fairness", "C) Accountability", "D) Privacy"],
        answer="C",
        explanation="Audit trails and ownership are accountability practices for traceability and remediation."
    ),
]

def run_quiz():
    print("\n============================================================")
    print("QUIZ: AI Ethics Principles (12 questions)")
    print("Answer by typing A, B, C, or D and pressing Enter.")
    print("============================================================\n")

    score = 0
    for i, q in enumerate(QUIZ, 1):
        print(f"Q{i}. {q.question}")
        for opt in q.options:
            print("   " + opt)

        while True:
            ans = input("Your answer (A/B/C/D): ").strip().upper()
            if ans in {"A", "B", "C", "D"}:
                break
            print("Please type only A, B, C, or D.")

        if ans == q.answer:
            score += 1
            print("✅ Correct!")
        else:
            print(f"❌ Not quite. Correct answer: {q.answer}")
        print("Explanation:", q.explanation)
        print("-" * 60)

    print(f"\nFinal Score: {score}/{len(QUIZ)}")
    if score < 9:
        print("Tip: Review the explanations above—then continue to training to see how labels map to text.")
    else:
        print("Nice — you’ve got the core concepts. Training will be more intuitive now.")

    while True:
        proceed = input("\nProceed to model training? (yes/no): ").strip().lower()
        if proceed in {"yes", "no"}:
            return proceed == "yes"
        print("Please type 'yes' or 'no'.")





In [50]:
# ==========================================
# SECTION 3 — TRAINING (WITH CLEAR COMMENTS)
# ==========================================
# We train a simple, effective baseline:
# - TF-IDF turns text into numeric features
# - LinearSVC is strong for text classification
# - MultiOutputClassifier predicts BOTH labels:
#       ethics principle + lifecycle stage
#
# We also:
# - split into train/test
# - print metrics for both tasks
# ==========================================

def train_model(examples: List[SeedExample],
                test_size: float = 0.25,
                random_state: int = 42):
    """
    Trains a text classifier that predicts:
      1) AI ethics principle
      2) AI lifecycle stage

    Parameters:
    ----------
    examples : list of (text, ethics_label, lifecycle_label)
        The labeled training data.

    test_size : float
        Fraction of data to hold out for testing.
        0.25 = 25% test, 75% training.

    random_state : int
        Seed for randomness so results are reproducible.
    """

    # ------------------------------------------------------------
    # STEP 1: SHUFFLE THE DATA
    # ------------------------------------------------------------
    # Why?
    # ----
    # Your dataset was constructed in blocks:
    #   - Privacy examples first
    #   - Then Transparency
    #   - Then Accountability
    #   - Then Fairness
    #
    # If we DON'T shuffle, the train/test split could accidentally
    # take mostly one class into the test set or training set.
    #
    # Shuffling ensures examples are randomly mixed before splitting.
    random.Random(random_state).shuffle(examples)


    # ------------------------------------------------------------
    # STEP 2: SEPARATE INPUTS (X) AND OUTPUTS (y)
    # ------------------------------------------------------------
    # In supervised learning:
    #   X = inputs (features)  -> the text statements
    #   y = outputs (labels)   -> what we want to predict
    #
    # Each example is:
    #   (text, ethics_label, lifecycle_label)
    #
    # We split them into separate lists.

    # X: the raw text statements
    texts = [t for (t, _, _) in examples]

    # y_ethics: labels like "Privacy", "Fairness", etc.
    y_ethics = [e for (_, e, _) in examples]

    # y_stage: labels like "Data", "Development", "Deployment"
    y_stage = [s for (_, _, s) in examples]


    # ------------------------------------------------------------
    # STEP 3: CREATE A MULTI-OUTPUT TARGET
    # ------------------------------------------------------------
    # We want the model to predict TWO things at once:
    #   1) Ethics principle
    #   2) Lifecycle stage
    #
    # scikit-learn expects one "y" object, so we combine both labels
    # into a single structure:
    #
    #   Y[i] = (ethics_label_i, lifecycle_label_i)
    #
    # Example:
    #   ("Privacy", "Deployment")
    Y = list(zip(y_ethics, y_stage))


    # ------------------------------------------------------------
    # STEP 4: TRAIN / TEST SPLIT
    # ------------------------------------------------------------
    # Why split?
    # ----------
    # We want to measure how well the model performs on
    # *unseen* data, not just memorized training data.
    #
    # train_test_split:
    #   - X_train, Y_train → used to TRAIN the model
    #   - X_test,  Y_test  → used to EVALUATE the model
    #
    # stratify=y_ethics:
    # ------------------
    # This ensures each ethics principle appears in roughly
    # the same proportion in both train and test sets.
    #
    # Without stratification, the test set might accidentally
    # contain very few examples of one principle.
    X_train, X_test, Y_train, Y_test = train_test_split(
        texts,
        Y,
        test_size=test_size,
        random_state=random_state,
        stratify=y_ethics
    )


    # ------------------------------------------------------------
    # STEP 5: DEFINE THE CLASSIFIER
    # ------------------------------------------------------------
    # LinearSVC (Support Vector Classifier):
    # ------------------------------------
    # - Very strong baseline for text classification
    # - Works well with high-dimensional sparse data (TF-IDF)
    # - Fast and reliable for small/medium datasets
    #
    # IMPORTANT:
    # ----------
    # LinearSVC does NOT output probabilities by default.
    # It outputs class predictions directly.
    base_clf = LinearSVC()


    # ------------------------------------------------------------
    # STEP 6: BUILD THE PIPELINE
    # ------------------------------------------------------------
    # A Pipeline chains multiple steps together so that:
    #   - During training: each step is fitted in order
    #   - During prediction: the same transformations are applied
    #
    # Our pipeline has TWO stages:
    #
    # (1) TF-IDF Vectorizer
    # --------------------
    # Converts raw text into numerical features.
    #
    # - TF (Term Frequency):
    #     How often a word appears in a document
    #
    # - IDF (Inverse Document Frequency):
    #     Downweights words that appear in many documents
    #
    # ngram_range=(1, 2):
    # -------------------
    # - Use single words (unigrams): "delete"
    # - AND two-word phrases (bigrams): "right delete"
    #
    # This helps capture short phrases common in policy language.
    #
    # min_df=1:
    # ---------
    # - Keep even rare words (since dataset is small)
    #
    # (2) MultiOutputClassifier
    # -------------------------
    # Wraps a classifier so it can predict MULTIPLE labels.
    #
    # Internally:
    # - One LinearSVC is trained for ethics principle
    # - One LinearSVC is trained for lifecycle stage
    #
    # Same text → two predictions.
    model = Pipeline([
        ("tfidf", TfidfVectorizer(
            ngram_range=(1, 2),
            min_df=1
        )),
        ("clf", MultiOutputClassifier(base_clf))
    ])

    # ------------------------------------------------------------
    # The model is now READY to be trained using:
    #     model.fit(X_train, Y_train)
    #
    # And evaluated using:
    #     model.predict(X_test)
    #
    # (Those steps happen outside this snippet.)
    # ------------------------------------------------------------

    # ----------------------------
    # TRAINING STEP
    # ----------------------------
    # Fit the pipeline on training data:
    # - Vectorizer learns vocabulary
    # - Classifier learns patterns between words/phrases and labels
    model.fit(X_train, Y_train)

    # ----------------------------
    # EVALUATION STEP
    # ----------------------------
    Y_pred = model.predict(X_test)

    # Separate predictions for reporting
    ethics_true = [y[0] for y in Y_test]
    stage_true = [y[1] for y in Y_test]
    ethics_pred = [y[0] for y in Y_pred]
    stage_pred = [y[1] for y in Y_pred]

    print("\n============================================================")
    print("EVALUATION RESULTS (holdout test set)")
    print("============================================================\n")

    print("---- Ethics Principle Classification Report ----")
    print(classification_report(ethics_true, ethics_pred, digits=3))

    print("---- Lifecycle Stage Classification Report ----")
    print(classification_report(stage_true, stage_pred, digits=3))

    # Confusion matrices (compact view)
    print("---- Confusion Matrix: Ethics (rows=true, cols=pred) ----")
    labels_ethics = ["Privacy", "Transparency", "Accountability", "Fairness"]
    print(labels_ethics)
    print(confusion_matrix(ethics_true, ethics_pred, labels=labels_ethics))

    print("\n---- Confusion Matrix: Lifecycle (rows=true, cols=pred) ----")
    labels_stage = ["Data", "Development", "Deployment"]
    print(labels_stage)
    print(confusion_matrix(stage_true, stage_pred, labels=labels_stage))

    return model



In [51]:
# ======================================
# SECTION 4 — TESTING / TRY YOUR OWN TEXT
# ======================================
# After training:
# - Predict on custom statements interactively
# - Provide a small set of built-in test cases
# ======================================

def predict_statement(model, text: str):
    pred = model.predict([text])[0]
    ethics_label, stage_label = pred[0], pred[1]
    return ethics_label, stage_label

def run_builtin_tests(model):
    tests = [
        "We should allow users to delete their data and stop collecting analytics logs.",
        "Provide a clear explanation when the system rejects a job application.",
        "Assign a model owner and create incident response procedures for harmful outputs.",
        "Evaluate the model across gender and region to reduce disparate impact.",
        "Document training data sources and known limitations in a model card.",
        "Monitor subgroup performance after launch and retrain when drift is detected.",
        "Remove identifiers before training and restrict access to sensitive datasets.",
        "Disclose when users are interacting with an AI chatbot and why it responded that way.",
    ]

    print("\n============================================================")
    print("BUILT-IN TEST CASES")
    print("============================================================")
    for t in tests:
        e, s = predict_statement(model, t)
        print(f"\nText: {t}")
        print(f"Predicted -> Principle: {e} | Lifecycle: {s}")

def interactive_testing_loop(model):
    print("\n============================================================")
    print("INTERACTIVE TESTING")
    print("Type a statement and press Enter to classify it.")
    print("Type 'quit' to stop.")
    print("============================================================\n")

    while True:
        text = input("Enter a statement (or 'quit'): ").strip()
        if text.lower() == "quit":
            break
        if len(text) < 10:
            print("Please enter a bit more detail (at least ~10 characters).")
            continue

        e, s = predict_statement(model, text)
        print(f"→ Predicted Principle: {e}")
        print(f"→ Predicted Lifecycle: {s}")
        print("-" * 60)


# =========================
# MAIN PROGRAM FLOW
# =========================
if __name__ == "__main__":
    print("Seed labeled examples:", len(seed_examples))
    print("Principles: 50 each (Privacy, Transparency, Accountability, Fairness)\n")

    # 1) Run quiz BEFORE training
   # proceed = run_quiz()
   # if not proceed:
   #     print("\nOkay — stopping here. Re-run when you want to train the model.")
   #     raise SystemExit(0)

    # 2) Train + evaluate
    model = train_model(seed_examples, test_size=0.25, random_state=42)

    # 3) Built-in tests
    run_builtin_tests(model)

    # 4) User tries their own statements
    interactive_testing_loop(model)

    print("\nDone. You can now expand seed_examples with your own labeled data for higher accuracy.")


Seed labeled examples: 200
Principles: 50 each (Privacy, Transparency, Accountability, Fairness)


EVALUATION RESULTS (holdout test set)

---- Ethics Principle Classification Report ----
                precision    recall  f1-score   support

Accountability      0.714     0.769     0.741        13
      Fairness      0.857     0.500     0.632        12
       Privacy      0.714     0.769     0.741        13
  Transparency      0.800     1.000     0.889        12

      accuracy                          0.760        50
     macro avg      0.771     0.760     0.750        50
  weighted avg      0.769     0.760     0.750        50

---- Lifecycle Stage Classification Report ----
              precision    recall  f1-score   support

        Data      0.667     0.842     0.744        19
  Deployment      0.688     0.579     0.629        19
 Development      0.500     0.417     0.455        12

    accuracy                          0.640        50
   macro avg      0.618     0.613     0.60

Enter a statement (or 'quit'):  Minorities did not get loans


→ Predicted Principle: Fairness
→ Predicted Lifecycle: Data
------------------------------------------------------------


Enter a statement (or 'quit'):  The user was not aware of what was being done with his data


→ Predicted Principle: Transparency
→ Predicted Lifecycle: Data
------------------------------------------------------------


Enter a statement (or 'quit'):  Personally Identifiable Information should be removed


→ Predicted Principle: Privacy
→ Predicted Lifecycle: Data
------------------------------------------------------------


Enter a statement (or 'quit'):  quit



Done. You can now expand seed_examples with your own labeled data for higher accuracy.
