In [236]:
%pip install -qU fairlearn pandas scikit-learn scipy matplotlib seaborn ucimlrepo ipywidgets

Note: you may need to restart the kernel to use updated packages.


In [237]:
import pandas as pd
from pandas import DataFrame
import numpy as np
from ucimlrepo import fetch_ucirepo

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from scipy import stats

from typing import Optional, Callable, List, Tuple, Mapping
from IPython.display import Markdown, display

In [238]:
def load_uci_dataset(
    id: int,
    get_column_names: Optional[Callable[[np.ndarray], List[str]]] = None,
) -> DataFrame:
    dataset = fetch_ucirepo(id=id)
    df = dataset.data.original

    # Display metadata and variable information
    print("Dataset Metadata:")
    print(dataset.metadata)
    print("\nVariable Information:")
    print(dataset.variables)

    if get_column_names is not None:
        column_names = get_column_names(dataset.variables)
        print(df.columns)
        df.columns = column_names

    # Rename the last column to "target"
    df = df.rename(columns={df.columns[-1]: "target"})

    print(df.columns.values)

    # Return the complete DataFrame with all features and target column labeled "target"
    return df

In [239]:
dataset_names = ["Adult", "Credit", "Bank"]

In [240]:
adult = load_uci_dataset(2)
credit_card = load_uci_dataset(
    350, lambda dataset_variables: dataset_variables["description"].str.lower()  # Ignore ID column
)
bank_marketing = load_uci_dataset(222)

Dataset Metadata:
{'uci_id': 2, 'name': 'Adult', 'repository_url': 'https://archive.ics.uci.edu/dataset/2/adult', 'data_url': 'https://archive.ics.uci.edu/static/public/2/data.csv', 'abstract': 'Predict whether annual income of an individual exceeds $50K/yr based on census data. Also known as "Census Income" dataset. ', 'area': 'Social Science', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 48842, 'num_features': 14, 'feature_types': ['Categorical', 'Integer'], 'demographics': ['Age', 'Income', 'Education Level', 'Other', 'Race', 'Sex'], 'target_col': ['income'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 1996, 'last_updated': 'Tue Sep 24 2024', 'dataset_doi': '10.24432/C5XW20', 'creators': ['Barry Becker', 'Ronny Kohavi'], 'intro_paper': None, 'additional_info': {'summary': "Extraction was done by Barry Becker from the 1994 Census database.  A set of reasonably clean records was extra

In [241]:
for name, dataset in zip(dataset_names, [adult, credit_card, bank_marketing]):
    print(f"Dataset: {name}")
    print(dataset.head())
    print(dataset.info())
    print("\n---\n")

Dataset: Adult
   age         workclass  fnlwgt  ... hours-per-week  native-country target
0   39         State-gov   77516  ...             40   United-States  <=50K
1   50  Self-emp-not-inc   83311  ...             13   United-States  <=50K
2   38           Private  215646  ...             40   United-States  <=50K
3   53           Private  234721  ...             40   United-States  <=50K
4   28           Private  338409  ...             40            Cuba  <=50K

[5 rows x 15 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48842 entries, 0 to 48841
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   age             48842 non-null  int64 
 1   workclass       47879 non-null  object
 2   fnlwgt          48842 non-null  int64 
 3   education       48842 non-null  object
 4   education-num   48842 non-null  int64 
 5   marital-status  48842 non-null  object
 6   occupation      47876 non-null  object
 

In [242]:
def prepare_dataset(
    df: DataFrame,
    features: List[str],
    sensitive_attribute: str,
    columns_names: Optional[List[str]] = None,
    convert_target: Optional[Callable[[str], int]] = None,
) -> Tuple[np.ndarray[np.ndarray], np.ndarray, np.ndarray]:
    attributes = np.concatenate((np.array(features), np.array([sensitive_attribute, "target"])))
    print(attributes)
    df = df[attributes]
    print(df)
    df = df.dropna()

    X = df[features]
    y = df["target"]

    if convert_target is not None:
        y = y.apply(convert_target).astype(int)

    if columns_names is not None:
        X.columns = columns_names

    sensitive_series = df[sensitive_attribute]  # e.g. "sex" or "race"

    # Normalize features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    return X, y.values, sensitive_series.values

In [243]:
features_adult = ["age", "education-num", "capital-gain", "capital-loss", "hours-per-week"]

features_credit_card = ["limit_bal", "education", "age", "pay_0", *[f"pay_{i}" for i in range(2, 7)], *[f"bill_amt{i}" for i in range(1, 7)], *[f"pay_amt{i}" for i in range(1, 7)]]

# "duration" not included as specified on https://archive.ics.uci.edu/dataset/222/bank+marketing
features_bank_marketing = ["age", "balance", "campaign", "pdays", "previous"]

adult_preprocessed = prepare_dataset(
    adult,
    features=features_adult,
    sensitive_attribute="sex",
    convert_target=lambda income: income == ">50K",
)
credit_preprocessed = prepare_dataset(
    credit_card,
    features_credit_card,
    sensitive_attribute="sex",
)
bank_preprocessed = prepare_dataset(
    bank_marketing,
    features_bank_marketing,
    sensitive_attribute="marital",
    convert_target=lambda default: default == "yes",
)

['age' 'education-num' 'capital-gain' 'capital-loss' 'hours-per-week'
 'sex' 'target']
       age  education-num  capital-gain  ...  hours-per-week     sex  target
0       39             13          2174  ...              40    Male   <=50K
1       50             13             0  ...              13    Male   <=50K
2       38              9             0  ...              40    Male   <=50K
3       53              7             0  ...              40    Male   <=50K
4       28             13             0  ...              40  Female   <=50K
...    ...            ...           ...  ...             ...     ...     ...
48837   39             13             0  ...              36  Female  <=50K.
48838   64              9             0  ...              40    Male  <=50K.
48839   38             13             0  ...              50    Male  <=50K.
48840   44             13          5455  ...              40    Male  <=50K.
48841   35             13             0  ...              60    Ma

In [244]:
for arr in adult_preprocessed:
    print(arr)

[[ 0.02599598  1.136512    0.14693247 -0.2171271  -0.03408696]
 [ 0.82830842  1.136512   -0.14480353 -0.2171271  -2.21303208]
 [-0.04694151 -0.41933527 -0.14480353 -0.2171271  -0.03408696]
 ...
 [-0.04694151  1.136512   -0.14480353 -0.2171271   0.77292975]
 [ 0.39068346  1.136512    0.58722034 -0.2171271  -0.03408696]
 [-0.26575399  1.136512   -0.14480353 -0.2171271   1.57994645]]
[0 0 0 ... 0 0 0]
['Male' 'Male' 'Male' ... 'Male' 'Male' 'Male']


In [245]:
RANDOM_STATE = 42   # for reproducibility

In [246]:

def assess_fairness(X, y, sensitive_attribute, model, k=20):
    kf = KFold(n_splits=k, shuffle=True, random_state=RANDOM_STATE)

    metrics = {
        group: {"TPR": [], "FPR": [], "FN_FP_ratio": []}
        for group in np.unique(sensitive_attribute)
    }

    total_accuracy = 0    # Not for statistical testing, just for visualization later
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        _sensitive_train, sensitive_test = (
            sensitive_attribute[train_index],
            sensitive_attribute[test_index],
        )

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        for group in np.unique(sensitive_attribute):
            group_mask = sensitive_test == group
            tn, fp, fn, tp = confusion_matrix(
                y_test[group_mask], y_pred[group_mask]
            ).ravel()
            f1 = f1_score(y_test[group_mask], y_pred[group_mask])

            tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
            fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
            fn_fp_ratio = fn / fp if fp > 0 else 0

            metrics[group]["TPR"].append(tpr)
            metrics[group]["FPR"].append(fpr)
            metrics[group]["FN_FP_ratio"].append(fn_fp_ratio)

        total_accuracy += accuracy_score(y_test, y_pred)
    metrics["accuracy"] = total_accuracy / k

    return metrics

In [247]:
METRICS_TO_TEST = ["TPR", "FPR", "FN_FP_ratio"]

def statistical_test(metrics, groups):
    results = {}
    if len(groups) == 2:
        results = statistical_t_test(metrics, *groups)
    else:
        results = statistical_anova_test(metrics, *groups)
    results["accuracy"] = metrics["accuracy"]
    return results

def statistical_anova_test(metrics, *groups):
    # Perform one-way ANOVA for more than two groups
    results = {}
    for metric in METRICS_TO_TEST:
        data = []
        group_labels = []
        for group in groups:
            data.extend(metrics[group][metric])
            group_labels.extend([group] * len(metrics[group][metric]))
        
        f_stat, p_value = stats.f_oneway(*[metrics[group][metric] for group in groups])
        results[metric] = {
            "test": "ANOVA",
            "statistic": f_stat,
            "p_value": p_value
        }

        # Perform post-hoc Tukey's HSD test
        tukey_results = stats.tukey_hsd(*[metrics[group][metric] for group in groups])
        results[metric]["post_hoc"] = tukey_results
    return results

def statistical_t_test(metrics, group1, group2):
    results = {}
    for metric in METRICS_TO_TEST:
        t_stat, p_value = stats.ttest_ind(
            metrics[group1][metric], metrics[group2][metric]
        )
        results[metric] = {
            "test": "t-test",
            "statistic": t_stat,
            "p_value": p_value
        }
    return results

In [248]:
USE_ALL_PROCESSORS = -1
models = {
    "Logistic Regression": LogisticRegression(random_state=RANDOM_STATE, n_jobs=USE_ALL_PROCESSORS),
    "Random Forest": RandomForestClassifier(random_state=RANDOM_STATE, n_jobs=USE_ALL_PROCESSORS),
    "Support Vector Machine": SVC(random_state=RANDOM_STATE),
    "K-Nearest Neighbors": KNeighborsClassifier(n_jobs=USE_ALL_PROCESSORS),
}
description_to_acronym = {
    "Logistic Regression": "LR",
    "Random Forest": "RF",
    "Support Vector Machine": "SVM",
    "K-Nearest Neighbors": "KNN",
}

In [249]:
STATISTIC_SIGNIFICANCE_MAXIMUM = 0.05

def run_dataset(X, y, sensitive_attribute, out_file = None):
    model_results = {}
    # Assess fairness for each model
    for i, (model_name, model) in enumerate(models.items()):
        print(f"\n\n### {i + 1}. Assessing fairness for {model_name}", file=out_file)
        metrics = assess_fairness(X, y, sensitive_attribute, model)

        # Print average metrics for each group
        groups = np.unique(sensitive_attribute)
        for group in groups:
            print(f"\n\nGroup: **{group}**", file=out_file)
            for metric, values in metrics[group].items():
                print(f"\n- {metric}: {np.mean(values):.4f}", file=out_file)

        # Perform statistical test
        results = statistical_test(metrics, groups)
        print("\n\n#### Statistical Test Results:", file=out_file)
        for metric in METRICS_TO_TEST:
            statistics = results[metric]
            print(f"\n- {metric}:", file=out_file)
            print(f"\n\t- Test: {statistics['test']}", file=out_file)
            print(f"\n\t- Statistic: {statistics['statistic']:.4f}", file=out_file)
            print(f"\n\t- p-value: {statistics['p_value']:.4f}", file=out_file)

            if statistics['test'] == "ANOVA" and statistics['p_value'] < STATISTIC_SIGNIFICANCE_MAXIMUM:
                print("\n\t- Post-hoc Tukey HSD Results:", file=out_file)
                print("\n```", file=out_file)
                print(statistics['post_hoc'], file=out_file)
                print("\n```", file=out_file)

        model_results[model_name] = results

        print(f"\n\n**Overall average accuracy**: {metrics["accuracy"]:.3f}", file=out_file)
    return model_results

In [250]:
output_file = "output.md"

In [251]:
# Could have redirected sys.stdout directly to a file, but this approach is more flexible
def run(out_file):
    print("# Training and testing models for biases", file=out_file)
    dataset_results = {}
    for name, dataset in zip(dataset_names, [adult_preprocessed, credit_preprocessed, bank_preprocessed]):
        print(f"## Dataset {name}", file=out_file)
        X, y, sensitive_attribute = dataset
        results = run_dataset(X, y, sensitive_attribute, out_file=out_file)
        dataset_results[name] = results
    return dataset_results

results = None
with open(output_file, "w") as f:
    results = run(f)

In [252]:
def format_p_value(p: float) -> str:
    if p >= STATISTIC_SIGNIFICANCE_MAXIMUM:
        return "**-**"
    limit = 0.0001
    return f"< {limit}" if p < limit else f"{p:.4f}"


def fill_dataset_table_rows(
    dict: Mapping[str, Mapping[str, float]], format_func: Callable[[float], str]
):
    row_str = ""
    for stat, values in dict.items():
        values.sort()  # Sort by the model acronym to ensure consistent ordering
        row_str += f"| {stat}"
        row_str += "| " + " | ".join(
            [f"{format_func(value)}" for _model, value in values]
        )
        row_str += " |\n"
    return row_str


table_str = (
    "| **Metrics** | "
    + " | ".join(
        map(
            lambda name: f"**{name}**",
            sorted(  # Sort by the model acronym to ensure consistent ordering
                map(
                    lambda model_desc: description_to_acronym[model_desc], models.keys()
                )
            ),
        )
    )
    + " |\n"
)
table_str += "| --- | " + " | ".join(["---" for _ in models.keys()]) + " |\n"
for dataset, dataset_results in results.items():
    table_str += f"| Dataset **{dataset}**  \n"

    statistic_values = {}
    score_values = {}
    for model, statistics in dataset_results.items():
        model = description_to_acronym[model]  # e.g.: Use "RF" instead of Random Forest
        for stat in METRICS_TO_TEST:
            values = statistics[stat]
            p_value = values["p_value"]
            statistic_values[stat] = statistic_values.get(stat, [])
            statistic_values[stat].append((model, p_value))

        score_values["Accuracy"] = score_values.get("Accuracy", [])
        score_values["Accuracy"].append((model, statistics["accuracy"]))

    table_str += fill_dataset_table_rows(statistic_values, format_p_value)
    table_str += fill_dataset_table_rows(score_values, lambda x: f"{x:.3f}")

output_file_table = "output_table.md"
with open(output_file_table, "w") as f:
    f.write(table_str)

Markdown(table_str)

| **Metrics** | **KNN** | **LR** | **RF** | **SVM** |
| --- | --- | --- | --- | --- |
| Dataset **Adult**  
| TPR| 0.0003 | < 0.0001 | 0.0244 | **-** |
| FPR| < 0.0001 | < 0.0001 | < 0.0001 | < 0.0001 |
| FN_FP_ratio| < 0.0001 | 0.0462 | < 0.0001 | **-** |
| Accuracy| 0.817 | 0.842 | 0.843 | 0.853 |
| Dataset **Credit**  
| TPR| **-** | **-** | **-** | **-** |
| FPR| 0.0002 | 0.0004 | < 0.0001 | 0.0030 |
| FN_FP_ratio| **-** | **-** | **-** | **-** |
| Accuracy| 0.794 | 0.810 | 0.815 | 0.820 |
| Dataset **Bank**  
| TPR| **-** | **-** | 0.0035 | 0.0009 |
| FPR| 0.0002 | **-** | < 0.0001 | 0.0102 |
| FN_FP_ratio| 0.0014 | 0.0004 | 0.0002 | < 0.0001 |
| Accuracy| 0.873 | 0.882 | 0.856 | 0.883 |


The next cell is the last one, and has an extensive output.

For each dataset, and for each model, it shows information about the different metrics $FPR$, $TPR$ and $\frac{FN}{FP}$. For each group, it will show the mean value of these metrics, as well as the statistical test result of comparing the values of the metric across groups.

In [253]:
with open(output_file) as f:
    display(Markdown(f.read()))

# Training and testing models for biases
## Dataset Adult


### 1. Assessing fairness for Logistic Regression


Group: **Female**

- TPR: 0.0863

- FPR: 0.0111

- FN_FP_ratio: 7.5206


Group: **Male**

- TPR: 0.1262

- FPR: 0.0248

- FN_FP_ratio: 9.2278


#### Statistical Test Results:

- TPR:

	- Test: t-test

	- Statistic: -4.8139

	- p-value: 0.0000

- FPR:

	- Test: t-test

	- Statistic: -11.0649

	- p-value: 0.0000

- FN_FP_ratio:

	- Test: t-test

	- Statistic: -2.0613

	- p-value: 0.0462


**Overall average accuracy**: 0.842


### 2. Assessing fairness for Random Forest


Group: **Female**

- TPR: 0.2556

- FPR: 0.0335

- FN_FP_ratio: 1.7676


Group: **Male**

- TPR: 0.2892

- FPR: 0.0596

- FN_FP_ratio: 3.0884


#### Statistical Test Results:

- TPR:

	- Test: t-test

	- Statistic: -2.3445

	- p-value: 0.0244

- FPR:

	- Test: t-test

	- Statistic: -15.3159

	- p-value: 0.0000

- FN_FP_ratio:

	- Test: t-test

	- Statistic: -12.4910

	- p-value: 0.0000


**Overall average accuracy**: 0.843


### 3. Assessing fairness for Support Vector Machine


Group: **Female**

- TPR: 0.1843

- FPR: 0.0092

- FN_FP_ratio: 9.2422


Group: **Male**

- TPR: 0.1793

- FPR: 0.0244

- FN_FP_ratio: 8.8223


#### Statistical Test Results:

- TPR:

	- Test: t-test

	- Statistic: 0.3714

	- p-value: 0.7124

- FPR:

	- Test: t-test

	- Statistic: -11.9265

	- p-value: 0.0000

- FN_FP_ratio:

	- Test: t-test

	- Statistic: 0.2874

	- p-value: 0.7754


**Overall average accuracy**: 0.853


### 4. Assessing fairness for K-Nearest Neighbors


Group: **Female**

- TPR: 0.3198

- FPR: 0.0723

- FN_FP_ratio: 0.7517


Group: **Male**

- TPR: 0.3661

- FPR: 0.1090

- FN_FP_ratio: 1.5051


#### Statistical Test Results:

- TPR:

	- Test: t-test

	- Statistic: -3.9426

	- p-value: 0.0003

- FPR:

	- Test: t-test

	- Statistic: -11.8540

	- p-value: 0.0000

- FN_FP_ratio:

	- Test: t-test

	- Statistic: -14.1194

	- p-value: 0.0000


**Overall average accuracy**: 0.817
## Dataset Credit


### 1. Assessing fairness for Logistic Regression


Group: **1**

- TPR: 0.2411

- FPR: 0.0327

- FN_FP_ratio: 8.1295


Group: **2**

- TPR: 0.2309

- FPR: 0.0228

- FN_FP_ratio: 9.8724


#### Statistical Test Results:

- TPR:

	- Test: t-test

	- Statistic: 1.0203

	- p-value: 0.3141

- FPR:

	- Test: t-test

	- Statistic: 3.8938

	- p-value: 0.0004

- FN_FP_ratio:

	- Test: t-test

	- Statistic: -1.6678

	- p-value: 0.1036


**Overall average accuracy**: 0.810


### 2. Assessing fairness for Random Forest


Group: **1**

- TPR: 0.3770

- FPR: 0.0675

- FN_FP_ratio: 3.0086


Group: **2**

- TPR: 0.3673

- FPR: 0.0532

- FN_FP_ratio: 3.1822


#### Statistical Test Results:

- TPR:

	- Test: t-test

	- Statistic: 0.8113

	- p-value: 0.4223

- FPR:

	- Test: t-test

	- Statistic: 4.7039

	- p-value: 0.0000

- FN_FP_ratio:

	- Test: t-test

	- Statistic: -1.0256

	- p-value: 0.3116


**Overall average accuracy**: 0.815


### 3. Assessing fairness for Support Vector Machine


Group: **1**

- TPR: 0.3490

- FPR: 0.0502

- FN_FP_ratio: 4.3095


Group: **2**

- TPR: 0.3334

- FPR: 0.0405

- FN_FP_ratio: 4.5481


#### Statistical Test Results:

- TPR:

	- Test: t-test

	- Statistic: 1.3244

	- p-value: 0.1933

- FPR:

	- Test: t-test

	- Statistic: 3.1773

	- p-value: 0.0030

- FN_FP_ratio:

	- Test: t-test

	- Statistic: -0.7151

	- p-value: 0.4789


**Overall average accuracy**: 0.820


### 4. Assessing fairness for K-Nearest Neighbors


Group: **1**

- TPR: 0.3644

- FPR: 0.0939

- FN_FP_ratio: 2.2357


Group: **2**

- TPR: 0.3586

- FPR: 0.0772

- FN_FP_ratio: 2.2158


#### Statistical Test Results:

- TPR:

	- Test: t-test

	- Statistic: 0.4569

	- p-value: 0.6504

- FPR:

	- Test: t-test

	- Statistic: 4.1443

	- p-value: 0.0002

- FN_FP_ratio:

	- Test: t-test

	- Statistic: 0.1321

	- p-value: 0.8956


**Overall average accuracy**: 0.794
## Dataset Bank


### 1. Assessing fairness for Logistic Regression


Group: **divorced**

- TPR: 0.0055

- FPR: 0.0011

- FN_FP_ratio: 7.3500


Group: **married**

- TPR: 0.0018

- FPR: 0.0009

- FN_FP_ratio: 64.1250


Group: **single**

- TPR: 0.0015

- FPR: 0.0011

- FN_FP_ratio: 48.3250


#### Statistical Test Results:

- TPR:

	- Test: ANOVA

	- Statistic: 1.3902

	- p-value: 0.2573

- FPR:

	- Test: ANOVA

	- Statistic: 0.1435

	- p-value: 0.8666

- FN_FP_ratio:

	- Test: ANOVA

	- Statistic: 8.9794

	- p-value: 0.0004

	- Post-hoc Tukey HSD Results:

```
Tukey's HSD Pairwise Group Comparisons (95.0% Confidence Interval)
Comparison  Statistic  p-value  Lower CI  Upper CI
 (0 - 1)    -56.775     0.000   -90.054   -23.496
 (0 - 2)    -40.975     0.012   -74.254    -7.696
 (1 - 0)     56.775     0.000    23.496    90.054
 (1 - 2)     15.800     0.492   -17.479    49.079
 (2 - 0)     40.975     0.012     7.696    74.254
 (2 - 1)    -15.800     0.492   -49.079    17.479


```


**Overall average accuracy**: 0.882


### 2. Assessing fairness for Random Forest


Group: **divorced**

- TPR: 0.1273

- FPR: 0.0394

- FN_FP_ratio: 3.2912


Group: **married**

- TPR: 0.1617

- FPR: 0.0476

- FN_FP_ratio: 2.0068


Group: **single**

- TPR: 0.1681

- FPR: 0.0653

- FN_FP_ratio: 2.3311


#### Statistical Test Results:

- TPR:

	- Test: ANOVA

	- Statistic: 6.2536

	- p-value: 0.0035

	- Post-hoc Tukey HSD Results:

```
Tukey's HSD Pairwise Group Comparisons (95.0% Confidence Interval)
Comparison  Statistic  p-value  Lower CI  Upper CI
 (0 - 1)     -0.034     0.020    -0.064    -0.005
 (0 - 2)     -0.041     0.005    -0.071    -0.011
 (1 - 0)      0.034     0.020     0.005     0.064
 (1 - 2)     -0.006     0.864    -0.036     0.023
 (2 - 0)      0.041     0.005     0.011     0.071
 (2 - 1)      0.006     0.864    -0.023     0.036


```

- FPR:

	- Test: ANOVA

	- Statistic: 38.1462

	- p-value: 0.0000

	- Post-hoc Tukey HSD Results:

```
Tukey's HSD Pairwise Group Comparisons (95.0% Confidence Interval)
Comparison  Statistic  p-value  Lower CI  Upper CI
 (0 - 1)     -0.008     0.025    -0.015    -0.001
 (0 - 2)     -0.026     0.000    -0.033    -0.019
 (1 - 0)      0.008     0.025     0.001     0.015
 (1 - 2)     -0.018     0.000    -0.025    -0.010
 (2 - 0)      0.026     0.000     0.019     0.033
 (2 - 1)      0.018     0.000     0.010     0.025


```

- FN_FP_ratio:

	- Test: ANOVA

	- Statistic: 10.2948

	- p-value: 0.0002

	- Post-hoc Tukey HSD Results:

```
Tukey's HSD Pairwise Group Comparisons (95.0% Confidence Interval)
Comparison  Statistic  p-value  Lower CI  Upper CI
 (0 - 1)      1.284     0.000     0.576     1.993
 (0 - 2)      0.960     0.005     0.252     1.669
 (1 - 0)     -1.284     0.000    -1.993    -0.576
 (1 - 2)     -0.324     0.517    -1.033     0.384
 (2 - 0)     -0.960     0.005    -1.669    -0.252
 (2 - 1)      0.324     0.517    -0.384     1.033


```


**Overall average accuracy**: 0.856


### 3. Assessing fairness for Support Vector Machine


Group: **divorced**

- TPR: 0.0082

- FPR: 0.0029

- FN_FP_ratio: 8.6083


Group: **married**

- TPR: 0.0154

- FPR: 0.0014

- FN_FP_ratio: 71.0275


Group: **single**

- TPR: 0.0010

- FPR: 0.0003

- FN_FP_ratio: 6.4250


#### Statistical Test Results:

- TPR:

	- Test: ANOVA

	- Statistic: 8.0164

	- p-value: 0.0009

	- Post-hoc Tukey HSD Results:

```
Tukey's HSD Pairwise Group Comparisons (95.0% Confidence Interval)
Comparison  Statistic  p-value  Lower CI  Upper CI
 (0 - 1)     -0.007     0.119    -0.016     0.001
 (0 - 2)      0.007     0.123    -0.001     0.016
 (1 - 0)      0.007     0.119    -0.001     0.016
 (1 - 2)      0.014     0.001     0.006     0.023
 (2 - 0)     -0.007     0.123    -0.016     0.001
 (2 - 1)     -0.014     0.001    -0.023    -0.006


```

- FPR:

	- Test: ANOVA

	- Statistic: 4.9766

	- p-value: 0.0102

	- Post-hoc Tukey HSD Results:

```
Tukey's HSD Pairwise Group Comparisons (95.0% Confidence Interval)
Comparison  Statistic  p-value  Lower CI  Upper CI
 (0 - 1)      0.001     0.199    -0.001     0.003
 (0 - 2)      0.003     0.007     0.001     0.005
 (1 - 0)     -0.001     0.199    -0.003     0.001
 (1 - 2)      0.001     0.343    -0.001     0.003
 (2 - 0)     -0.003     0.007    -0.005    -0.001
 (2 - 1)     -0.001     0.343    -0.003     0.001


```

- FN_FP_ratio:

	- Test: ANOVA

	- Statistic: 20.8280

	- p-value: 0.0000

	- Post-hoc Tukey HSD Results:

```
Tukey's HSD Pairwise Group Comparisons (95.0% Confidence Interval)
Comparison  Statistic  p-value  Lower CI  Upper CI
 (0 - 1)    -62.419     0.000   -89.774   -35.064
 (0 - 2)      2.183     0.980   -25.172    29.539
 (1 - 0)     62.419     0.000    35.064    89.774
 (1 - 2)     64.603     0.000    37.247    91.958
 (2 - 0)     -2.183     0.980   -29.539    25.172
 (2 - 1)    -64.603     0.000   -91.958   -37.247


```


**Overall average accuracy**: 0.883


### 4. Assessing fairness for K-Nearest Neighbors


Group: **divorced**

- TPR: 0.1048

- FPR: 0.0224

- FN_FP_ratio: 6.7439


Group: **married**

- TPR: 0.1294

- FPR: 0.0250

- FN_FP_ratio: 4.1000


Group: **single**

- TPR: 0.1066

- FPR: 0.0335

- FN_FP_ratio: 4.9838


#### Statistical Test Results:

- TPR:

	- Test: ANOVA

	- Statistic: 2.9535

	- p-value: 0.0602

- FPR:

	- Test: ANOVA

	- Statistic: 9.8601

	- p-value: 0.0002

	- Post-hoc Tukey HSD Results:

```
Tukey's HSD Pairwise Group Comparisons (95.0% Confidence Interval)
Comparison  Statistic  p-value  Lower CI  Upper CI
 (0 - 1)     -0.003     0.583    -0.009     0.004
 (0 - 2)     -0.011     0.000    -0.017    -0.005
 (1 - 0)      0.003     0.583    -0.004     0.009
 (1 - 2)     -0.009     0.005    -0.015    -0.002
 (2 - 0)      0.011     0.000     0.005     0.017
 (2 - 1)      0.009     0.005     0.002     0.015


```

- FN_FP_ratio:

	- Test: ANOVA

	- Statistic: 7.3630

	- p-value: 0.0014

	- Post-hoc Tukey HSD Results:

```
Tukey's HSD Pairwise Group Comparisons (95.0% Confidence Interval)
Comparison  Statistic  p-value  Lower CI  Upper CI
 (0 - 1)      2.644     0.001     0.956     4.332
 (0 - 2)      1.760     0.039     0.072     3.448
 (1 - 0)     -2.644     0.001    -4.332    -0.956
 (1 - 2)     -0.884     0.423    -2.572     0.804
 (2 - 0)     -1.760     0.039    -3.448    -0.072
 (2 - 1)      0.884     0.423    -0.804     2.572


```


**Overall average accuracy**: 0.873
