In [0]:
# imports
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, confusion_matrix
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
# from sklearn.tree import DecisionTreeClassifier

# helpers
def demographic_parity_gap(y_pred, A):
    '''
    todo: function description
    '''
    y_pred = np.asarray(y_pred)
    A = np.asarray(A)

    mask0 = (A == 0)
    mask1 = (A == 1)

    p0 = y_pred[mask0].mean()
    p1 = y_pred[mask1].mean()

    return abs(p1 - p0), p0, p1

def compute_metrics(y_true, y_pred, A):
    '''
    todo: function description
    '''
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    mcc = matthews_corrcoef(y_true, y_pred)
    dp_gap, p0, p1 = demographic_parity_gap(y_pred, A)

    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "mcc": mcc,
        "dp_gap": dp_gap,
        "dp_p0": p0,
        "dp_p1": p1,
        "confusion_matrix": confusion_matrix(y_true, y_pred),
    }

''' 
============================================================
 AdaBoost implementation w/ Perceptron
============================================================
'''
class AdaBoostClassifier(BaseEstimator, ClassifierMixin):

    def __init__(self, 
                 base_estimator=None, 
                 n_estimators=50, 
                 learning_rate=1.0,
                 fairness_lambda=0.0,
                 dp_tolerance=0.0,
                 random_state=None):

        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.random_state = random_state
        self.fairness_lambda = fairness_lambda
        self.dp_tolerance = dp_tolerance

        self.estimators_ = []
        self.estimator_weights_ = []
        self.classes_ = None

    def check_binary(self, y):
        y = np.asarray(y)
        classes = np.unique(y)
        if len(classes) != 2:
            raise ValueError("Binary classification only.")

        self.classes_ = classes
        return (y == classes[1]).astype(int)

    def fit(self, X, y, A=None):
        '''
        fit with fairness modification using demographic parity gap.
        A is the sensitive attribute (0/1).
        '''
        if self.base_estimator is None:
            raise ValueError("base_estimator must be provided.")

        if A is None:
            raise ValueError("Sensitive attribute A must be provided for fairness-aware training.")

        X = np.asarray(X)
        y = np.asarray(y)
        A = np.asarray(A)

        y01 = self.check_binary(y)
        n_samples = X.shape[0]

        sample_weight = np.ones(n_samples) / n_samples

        rng = np.random.RandomState(self.random_state) # random states

        self.estimators_ = []
        self.estimator_weights_ = []

        for t in range(self.n_estimators):
            est = clone(self.base_estimator)
            est.fit(X, y01, sample_weight=sample_weight)

            y_pred = est.predict(X).astype(int)
            incorrect = (y_pred != y01).astype(float)

            err = np.dot(sample_weight, incorrect) / sample_weight.sum()
            err = np.clip(err, 1e-10, 1 - 1e-10)

            # modified @ 12/07
            # fairness penalty
            # dp_gap is absolute difference of positive rates between A=0 and A=1
            dp_gap, _, _ = demographic_parity_gap(y_pred, A)

            # if dp_gap > dp_tolerance, then we add a penalty
            fairness_penalty = self.fairness_lambda * max(0.0, dp_gap - self.dp_tolerance)

            # combined error
            combined_err = err + fairness_penalty
            combined_err = np.clip(combined_err, 1e-10, 1 - 1e-10)
            # modified @ 12/07 || end

            # alpha = self.learning_rate * 0.5 * np.log((1 - err) / err)
            alpha = 0.5 * np.log((1 - combined_err) / combined_err) * self.learning_rate

            if alpha <= 0:
                break

            # sample_weight *= np.exp(alpha * incorrect.astype(float))
            # sample_weight /= sample_weight.sum()
            sample_weight *= np.exp(-alpha * (2 * y01 - 1) * (2 * y_pred - 1))
            sample_weight /= sample_weight.sum()

            self.estimators_.append(est)
            self.estimator_weights_.append(alpha)

        self.estimator_weights_ = np.array(self.estimator_weights_)
        return self

    def scores(self, X):
        X = np.asarray(X)
        scores = np.zeros(X.shape[0])

        for alpha, est in zip(self.estimator_weights_, self.estimators_):
            pred = est.predict(X).astype(int)
            pred_pm1 = 2 * pred - 1
            scores += alpha * pred_pm1

        return scores

    def predict(self, X):
        scores = self.scores(X)
        y01 = (scores >= 0).astype(int)
        return np.where(y01 == 1, self.classes_[1], self.classes_[0])

    def predict_proba(self, X):
        scores = self.scores(X)
        p_pos = 1 / (1 + np.exp(-2 * scores))
        return np.vstack([1 - p_pos, p_pos]).T

# data pre-processing
DATA_DIR = "/Workspace/Users/alexandra.mangune@gmail.com/Masters/AI201/Mini-Project/" # todo: modify this path to match your folder path

def load_taiwan_dataset(test_size=0.3, val_size=0.2, random_state=42, sensitive="sex"):
    '''
    Load the Taiwan dataset (UCI Credit Card).
    sensitive: str, "sex" or "age"
    '''
    path = os.path.join(DATA_DIR, "UCI_Credit_Card.csv")
    df = pd.read_csv(path)
    df = df.drop(columns=["ID"])

    # Target
    y = df["default.payment.next.month"].values

    # Sensitive attributes
    df["SEX_BIN"] = df["SEX"].map({1: 1, 2: 0})
    df["AGE_NUM"] = df["AGE"]
    df["AGE_GROUP"] = pd.cut(
        df["AGE"], bins=[0, 29, 39, 49, 59, 120], labels=[0, 1, 2, 3, 4]
    ).astype(int)

    if sensitive.lower() == "sex":
        A = df["SEX_BIN"].values
    else:
        A = df["AGE_GROUP"].values

    df_features = df.drop(columns=["default.payment.next.month"])
    X = pd.get_dummies(df_features, drop_first=True)

    # train/validate/test split
    X_temp, X_test, y_temp, y_test, A_temp, A_test = train_test_split(
        X, y, A,
        test_size=test_size,
        stratify=y,
        random_state=random_state,
    )

    val_rel = val_size / (1.0 - test_size)
    X_train, X_val, y_train, y_val, A_train, A_val = train_test_split(
        X_temp, y_temp, A_temp,
        test_size=val_rel,
        stratify=y_temp,
        random_state=random_state,
    )

    scaler = StandardScaler()
    num_cols = X_train.select_dtypes(include=[np.number]).columns
    X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
    X_val[num_cols] = scaler.transform(X_val[num_cols])
    X_test[num_cols] = scaler.transform(X_test[num_cols])

    return (
        X_train, X_val, X_test,
        y_train, y_val, y_test,
        A_train, A_val, A_test,
        scaler,
    )

def load_german_dataset(test_size=0.3, val_size=0.2, random_state=42, sensitive="sex"):
    '''
    Load the German dataset (German Credit).
    sensitive: str, "sex" or "age"
    '''
    path = os.path.join(DATA_DIR, "german_credit_data.csv")
    df = pd.read_csv(path)
    df.columns = df.columns.str.strip().str.lower()

    # if "risk" in df.columns:
    #     df["risk_bin"] = df["risk"].map({"good": 0, "bad": 1})
    #     y = df["risk_bin"].values
    #     drop_tgt = ["risk", "risk_bin"]
    # elif "credit_risk" in df.columns:
    #     le = LabelEncoder()
    #     y = le.fit_transform(df["credit_risk"])
    #     drop_tgt = ["credit_risk"]
    # else:
    #     raise ValueError("Cannot find target column in German dataset")

    # if "sex" in df.columns:
    #     df["sex_bin"] = df["sex"].map({"male": 1, "female": 0})
    #     # df["sex_bin"] = df["Sex"].map({"male": 1, "female": 0})
    # else:
    #     raise ValueError("Expected 'sex' column in German dataset")

    y = df["credit amount"].values

    df["sex_bin"] = df["sex"].map({"male": 1, "female": 0})

    df["age_num"] = df["age"]
    df["age_group"] = pd.cut(
        df["age"], bins=[0, 25, 35, 45, 60, 120], labels=[0, 1, 2, 3, 4]
    ).astype(int)
    # df["age_num"] = df["Age"]
    # df["age_group"] = pd.cut(
    #     df["Age"], bins=[0, 25, 35, 45, 60, 120], labels=[0, 1, 2, 3, 4]
    # ).astype(int)

    if sensitive.lower() == "sex":
        A = df["sex_bin"].values
    else:
        A = df["age_group"].values

    # drop_cols = drop_tgt + ["sex", "age"]
    drop_cols = ["sex", "age"]
    df_features = df.drop(columns=[c for c in drop_cols if c in df.columns])

    X = pd.get_dummies(df_features, drop_first=True)

    # train/val/test split
    X_temp, X_test, y_temp, y_test, A_temp, A_test = train_test_split(
        X, y, A,
        test_size=test_size,
        stratify=y,
        random_state=random_state,
    )

    val_rel = val_size / (1.0 - test_size)
    X_train, X_val, y_train, y_val, A_train, A_val = train_test_split(
        X_temp, y_temp, A_temp,
        test_size=val_rel,
        stratify=y_temp,
        random_state=random_state,
    )

    scaler = StandardScaler()
    num_cols = X_train.select_dtypes(include=[np.number]).columns
    X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
    X_val[num_cols] = scaler.transform(X_val[num_cols])
    X_test[num_cols] = scaler.transform(X_test[num_cols])

    return (
        X_train, X_val, X_test,
        y_train, y_val, y_test,
        A_train, A_val, A_test,
        scaler,
    )

def load_dataset(name, sensitive="sex", **kwargs):
    '''
    Call either load_taiwan_dataset() or load_german_dataset() functions to load the dataset.
    '''
    if name.lower() == 'taiwan':
        return load_taiwan_dataset(sensitive=sensitive, **kwargs)
    elif name.lower() == 'german':
        return load_german_dataset(sensitive=sensitive, **kwargs)
    else:
        raise ValueError(f"Unknown dataset: {name}")

# run training and testing sets; validate the outputs
def run_experiment(
    dataset_name,
    sensitive="sex",
    lambdas=(0.0, 0.1, 0.3, 0.5, 1.0),
    n_estimators=50,
    dp_tolerance=0.05, # for fairness modifications
):
    '''
    runs the experiment for a given dataset and sensitive attribute.
    '''
    (
        X_train, X_val, X_test,
        y_train, y_val, y_test,
        A_train, A_val, A_test, # this is for the fairness modification
        scaler,
    ) = load_dataset(dataset_name, sensitive=sensitive)

    print(f"Dataset: {dataset_name}, sensitive: {sensitive}")
    print("Train shape:", X_train.shape, "Val:", X_val.shape, "Test:", X_test.shape)

    # use a Perceptron as the base learner instead of a decision tree stump
    base = Perceptron(
        max_iter=1000,
        eta0=0.01,
        random_state=0,
        tol=1e-3,
        fit_intercept=True
    )

    val_results = []

    for lam in lambdas:
        model = AdaBoostClassifier(
            base_estimator=base,
            n_estimators=n_estimators,
            learning_rate=1.0,
            fairness_lambda=lam, # for fairness modifications
            dp_tolerance=dp_tolerance # for fairness modifications
        )
        # fairness-aware fit uses A_train
        model.fit(X_train, y_train, A_train)
        y_val_pred = model.predict(X_val)

        metrics = compute_metrics(y_val, y_val_pred, A_val)
        metrics["lambda"] = lam
        val_results.append(metrics)

        print(
            f"lambda={lam:.2f} | "
            f"Acc={metrics['accuracy']:.3f}, F1={metrics['f1']:.3f}, "
            f"MCC={metrics['mcc']:.3f}"
        )

    best_idx = np.argmax([m["f1"] for m in val_results])
    best_lam = val_results[best_idx]["lambda"]
    print("\nBest lambda on validation (by F1):", best_lam)

    X_tr_full = pd.concat([X_train, X_val], axis=0)
    y_tr_full = np.concatenate([y_train, y_val])
    A_tr_full = np.concatenate([A_train, A_val])

    best_model = AdaBoostClassifier(
        base_estimator=base,
        n_estimators=n_estimators,
        learning_rate=1.0,
        fairness_lambda=best_lam,
        dp_tolerance=dp_tolerance,
    )
    best_model.fit(X_tr_full, y_tr_full, A_tr_full)
    y_test_pred = best_model.predict(X_test)

    test_metrics = compute_metrics(y_test, y_test_pred, A_test)
    print("\n=== Test metrics with best lambda ===")
    for k, v in test_metrics.items():
        if k == "confusion_matrix":
            print(k, "=\n", v)
        else:
            print(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}")

    return val_results, test_metrics

# plot figures
def plot_tradeoff(val_results, title_suffix=""):
    '''
    Plot the tradeoff between accuracy and demographic parity gap.
    '''
    lambdas = [m["lambda"] for m in val_results]
    acc = [m["accuracy"] for m in val_results]

    plt.figure()
    plt.plot(lambdas, acc, marker="o")
    plt.xlabel("lambda (fairness_lambda)")
    plt.ylabel("Validation Accuracy")
    plt.title(f"Accuracy vs lambda {title_suffix}")
    plt.grid(True)
    plt.show()
# run experiments

''' 
============================================================
 Run AdaBoost experiments (Perceptron base learner)
============================================================
'''

val_res_taiwan, test_taiwan = run_experiment(
    dataset_name="taiwan",
    sensitive="sex",
    lambdas=(0.0, 0.1, 0.3, 0.5, 1.0),
    n_estimators=40,
    dp_tolerance=0.05, # for fairness modifications
)
# plot_tradeoff(val_res_taiwan, title_suffix="(Taiwan, Sex)")

# Taiwan dataset fairness on age
val_res_taiwan, test_taiwan = run_experiment(
    dataset_name="taiwan",
    sensitive="age",
    lambdas=(0.0, 0.1, 0.3, 0.5, 1.0),
    n_estimators=40,
    dp_tolerance=0.05, # for fairness modifications
)
# plot_tradeoff(val_res_taiwan, title_suffix="(Taiwan, Age)")

# TODO: there's an error here!
# # German dataset fairness on sex
# val_res_german, test_german = run_experiment(
#     dataset_name="german",
#     sensitive="sex",
#     lambdas=(0.0, 0.1, 0.3, 0.5, 1.0),
#     n_estimators=40,
#     dp_tolerance=0.05, # for fairness modifications
# )
# # plot_tradeoff(val_res_german, title_suffix="(German, Sex)")

# # German dataset fairness on age
# val_res_german, test_german = run_experiment(
#     dataset_name="german",
#     sensitive="age",
#     lambdas=(0.0, 0.1, 0.3, 0.5, 1.0),
#     n_estimators=40,
#     dp_tolerance=0.05, # for fairness modifications
# )
# # plot_tradeoff(val_res_german, title_suffix="(German, Age)")


'''
============================================================
Baseline ANN model using MLPClassifier (2 hidden layers)
============================================================
'''

def run_mlp_baseline(
    dataset_name,
    sensitive="sex",
    hidden_layer_sizes=(64, 32),
    max_iter=500,
    random_state=0,
):
    # train a standard (non-boosted) MLPClassifier with 2 hidden layers and report validation + test metrics including fairness metrics.
    (
        X_train, X_val, X_test,
        y_train, y_val, y_test,
        A_train, A_val, A_test,
        scaler,
    ) = load_dataset(dataset_name, sensitive=sensitive)

    print(f"MLP baseline | Dataset: {dataset_name}, sensitive: {sensitive}")
    print("Train shape:", X_train.shape, "Val:", X_val.shape, "Test:", X_test.shape)

    mlp = MLPClassifier(
        hidden_layer_sizes=hidden_layer_sizes,
        activation="relu",
        solver="adam",
        learning_rate_init=0.001,
        max_iter=max_iter,
        random_state=random_state,
    )

    mlp.fit(X_train, y_train)

    # Validation metrics
    y_val_pred = mlp.predict(X_val)
    val_metrics = compute_metrics(y_val, y_val_pred, A_val)

    # Test metrics
    y_test_pred = mlp.predict(X_test)
    test_metrics = compute_metrics(y_test, y_test_pred, A_test)

    print("\n=== MLP Validation metrics ===")
    for k, v in val_metrics.items():
        if k == "confusion_matrix":
            print(k, "=\n", v)
        else:
            print(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}")

    print("\n=== MLP Test metrics ===")
    for k, v in test_metrics.items():
        if k == "confusion_matrix":
            print(k, "=\n", v)
        else:
            print(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}")

    return val_metrics, test_metrics, mlp

# sample MLP runs (can remove later)

mlp_val_taiwan_sex, mlp_test_taiwan_sex, mlp_model_taiwan_sex = run_mlp_baseline(
    dataset_name="taiwan",
    sensitive="sex",
    hidden_layer_sizes=(64, 32),
    max_iter=500,
    random_state=0,
)

mlp_val_taiwan_age, mlp_test_taiwan_age, mlp_model_taiwan_age = run_mlp_baseline(
    dataset_name="taiwan",
    sensitive="age",
    hidden_layer_sizes=(64, 32),
    max_iter=500,
    random_state=0,
)

mlp_val_german_sex, mlp_test_german_sex, mlp_model_german_sex = run_mlp_baseline(
    dataset_name="german",
    sensitive="sex",
    hidden_layer_sizes=(64, 32),
    max_iter=500,
    random_state=0,
)

mlp_val_german_age, mlp_test_german_age, mlp_model_german_age = run_mlp_baseline(
    dataset_name="german",
    sensitive="age",
    hidden_layer_sizes=(64, 32),
    max_iter=500,
    random_state=0,
)

Dataset: taiwan, sensitive: sex
Train shape: (14999, 26) Val: (6001, 26) Test: (9000, 26)
lambda=0.00 | Acc=0.741, F1=0.434, MCC=0.266
lambda=0.10 | Acc=0.741, F1=0.434, MCC=0.266
lambda=0.30 | Acc=0.741, F1=0.434, MCC=0.266
lambda=0.50 | Acc=0.741, F1=0.434, MCC=0.266
lambda=1.00 | Acc=0.741, F1=0.434, MCC=0.266

Best lambda on validation (by F1): 0.0

=== Test metrics with best lambda ===
accuracy: 0.6868
precision: 0.2922
recall: 0.2923
f1: 0.2922
mcc: 0.0911
dp_gap: 0.2577
dp_p0: 0.3233
dp_p1: 0.0657
confusion_matrix =
 [[5599 1410]
 [1409  582]]
Dataset: taiwan, sensitive: age
Train shape: (14999, 26) Val: (6001, 26) Test: (9000, 26)
lambda=0.00 | Acc=0.741, F1=0.434, MCC=0.266
lambda=0.10 | Acc=0.741, F1=0.434, MCC=0.266
lambda=0.30 | Acc=0.741, F1=0.434, MCC=0.266
lambda=0.50 | Acc=0.741, F1=0.434, MCC=0.266
lambda=1.00 | Acc=0.741, F1=0.434, MCC=0.266

Best lambda on validation (by F1): 0.0

=== Test metrics with best lambda ===
accuracy: 0.6868
precision: 0.2922
recall: 0.2923




=== MLP Validation metrics ===
accuracy: 0.7689
precision: 0.4690
recall: 0.3421
f1: 0.3956
mcc: 0.2619
dp_gap: 0.0224
dp_p0: 0.1525
dp_p1: 0.1750
confusion_matrix =
 [[4160  514]
 [ 873  454]]

=== MLP Test metrics ===
accuracy: 0.7714
precision: 0.4771
recall: 0.3451
f1: 0.4005
mcc: 0.2690
dp_gap: 0.0069
dp_p0: 0.1573
dp_p1: 0.1642
confusion_matrix =
 [[6256  753]
 [1304  687]]
MLP baseline | Dataset: taiwan, sensitive: age
Train shape: (14999, 26) Val: (6001, 26) Test: (9000, 26)





=== MLP Validation metrics ===
accuracy: 0.7689
precision: 0.4690
recall: 0.3421
f1: 0.3956
mcc: 0.2619
dp_gap: 0.0140
dp_p0: 0.1599
dp_p1: 0.1459
confusion_matrix =
 [[4160  514]
 [ 873  454]]

=== MLP Test metrics ===
accuracy: 0.7714
precision: 0.4771
recall: 0.3451
f1: 0.4005
mcc: 0.2690
dp_gap: 0.0103
dp_p0: 0.1541
dp_p1: 0.1438
confusion_matrix =
 [[6256  753]
 [1304  687]]


[0;31m---------------------------------------------------------------------------[0m
[0;31mValueError[0m                                Traceback (most recent call last)
File [0;32m<command-196705736324581>, line 547[0m
[1;32m    531[0m mlp_val_taiwan_sex, mlp_test_taiwan_sex, mlp_model_taiwan_sex [38;5;241m=[39m run_mlp_baseline(
[1;32m    532[0m     dataset_name[38;5;241m=[39m[38;5;124m"[39m[38;5;124mtaiwan[39m[38;5;124m"[39m,
[1;32m    533[0m     sensitive[38;5;241m=[39m[38;5;124m"[39m[38;5;124msex[39m[38;5;124m"[39m,
[0;32m   (...)[0m
[1;32m    536[0m     random_state[38;5;241m=[39m[38;5;241m0[39m,
[1;32m    537[0m )
[1;32m    539[0m mlp_val_taiwan_age, mlp_test_taiwan_age, mlp_model_taiwan_age [38;5;241m=[39m run_mlp_baseline(
[1;32m    540[0m     dataset_name[38;5;241m=[39m[38;5;124m"[39m[38;5;124mtaiwan[39m[38;5;124m"[39m,
[1;32m    541[0m     sensitive[38;5;241m=[39m[38;5;124m"[39m[38;5;124mage[39m[38;5;124m"[39m,
