In [16]:
# import sys

import numpy as np
from warnings import warn

from aif360.datasets import GermanDataset
from aif360.metrics import ClassificationMetric, BinaryLabelDatasetMetric
from common_utils import compute_metrics
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
        import load_preproc_data_german
from sklearn.preprocessing import MaxAbsScaler
from aif360.algorithms.inprocessing import MetaFairClassifier


from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from IPython.display import Markdown, display

In [39]:
privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]
dataset_orig = load_preproc_data_german(['age'])

In [52]:
# print out some labels, names, etc.
display(Markdown("#### Training Dataset shape"))
print(dataset_orig.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(dataset_orig.favorable_label, dataset_orig.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(dataset_orig.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(dataset_orig.privileged_protected_attributes, 
      dataset_orig.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(dataset_orig.feature_names)

#### Training Dataset shape

(1000, 11)


#### Favorable and unfavorable labels

1.0 2.0


#### Protected attribute names

['age']


#### Privileged and unprivileged protected attribute values

[array([1.])] [array([0.])]


#### Dataset feature names

['age', 'sex', 'credit_history=Delay', 'credit_history=None/Paid', 'credit_history=Other', 'savings=500+', 'savings=<500', 'savings=Unknown/None', 'employment=1-4 years', 'employment=4+ years', 'employment=Unemployed']


In [71]:
# split data into epochs, each with a different group of agents
NUM_EPOCHS = 5
dataset_orig_epochs = dataset_orig.split(NUM_EPOCHS, shuffle=True)

In [72]:
for epoch in range(NUM_EPOCHS):
    print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")

    dataset_epoch = dataset_orig_epochs[epoch]

    # split each data epoch into train and test
    dataset_orig_train, dataset_orig_vt = dataset_epoch.split([0.7], shuffle=True)
    dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.5], shuffle=True)
     
    # print out some labels, names, etc.
    display(Markdown("#### Training Dataset shape"))
    print(dataset_orig_train.features.shape)
    print(dataset_orig_valid.features.shape)
    
    # Logistic regression classifier and predictions
    scale_orig = StandardScaler()

    # Train
    X_train = scale_orig.fit_transform(dataset_orig_train.features)
    y_train = dataset_orig_train.labels.ravel()
    lmod = LogisticRegression(solver='liblinear')  # Solver specified to avoid future warnings
    lmod.fit(X_train, y_train)

    # Predict training data
    y_train_pred = lmod.predict(X_train)
    dataset_orig_train_pred = dataset_orig_train.copy(deepcopy=True)
    dataset_orig_train_pred.labels = y_train_pred

    # indices of favorable label
    pos_ind = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]

    # VALIDATION SET
    dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)
    X_valid = scale_orig.transform(dataset_orig_valid_pred.features)
    y_valid = dataset_orig_valid_pred.labels
    dataset_orig_valid_pred.scores = lmod.predict_proba(X_valid)[:,pos_ind].reshape(-1,1)

    # Predict test data
    dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)
    X_test = scale_orig.transform(dataset_orig_test_pred.features)
    y_test_pred = lmod.predict_proba(X_test)
    dataset_orig_test_pred.scores = y_test_pred[:, pos_ind].reshape(-1, 1)

    # Evaluate fairness metrics
    metric_train = BinaryLabelDatasetMetric(dataset_orig_train_pred, 
                                            unprivileged_groups=unprivileged_groups,
                                            privileged_groups=privileged_groups)
    metric_test = BinaryLabelDatasetMetric(dataset_orig_test_pred, 
                                           unprivileged_groups=unprivileged_groups,
                                           privileged_groups=privileged_groups)

    print("Training set: Difference in mean outcomes = {:.3f}".format(metric_train.mean_difference()))
    print("Test set: Difference in mean outcomes = {:.3f}".format(metric_test.mean_difference()))

    # NO FAIRNESS: find best classification threshold

    num_thresh = 100
    ba_arr = np.zeros(num_thresh)
    class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)
    for idx, class_thresh in enumerate(class_thresh_arr):
        
        fav_inds = dataset_orig_valid_pred.scores > class_thresh
        dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label
        dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label
        
        classified_metric_orig_valid = ClassificationMetric(dataset_orig_valid,
                                                dataset_orig_valid_pred, 
                                                unprivileged_groups=unprivileged_groups,
                                                privileged_groups=privileged_groups)
        
        ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\
                        +classified_metric_orig_valid.true_negative_rate())

    best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]
    best_class_thresh = class_thresh_arr[best_ind]

    print("Best balanced accuracy (no fairness constraints) = %.4f" % np.max(ba_arr))
    print("Optimal classification threshold (no fairness constraints) = %.4f" % best_class_thresh)

    # Metrics for the test set
    fav_inds = dataset_orig_test_pred.scores > best_class_thresh
    dataset_orig_test_pred.labels[fav_inds] = dataset_orig_test_pred.favorable_label
    dataset_orig_test_pred.labels[~fav_inds] = dataset_orig_test_pred.unfavorable_label

    display(Markdown("#### Test set"))
    display(Markdown("##### Raw predictions - No fairness constraints, only maximizing balanced accuracy"))

    metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred, 
                    unprivileged_groups, privileged_groups)


Epoch 1/5


#### Training Dataset shape

(140, 11)
(30, 11)
Training set: Difference in mean outcomes = -0.063
Test set: Difference in mean outcomes = -0.458
Best balanced accuracy (no fairness constraints) = 0.5682
Optimal classification threshold (no fairness constraints) = 0.8712


#### Test set

##### Raw predictions - No fairness constraints, only maximizing balanced accuracy

Balanced accuracy = 0.5714
Statistical parity difference = -0.1250
Disparate impact = 0.0000
Average odds difference = -0.0789
Equal opportunity difference = -0.1579
Theil index = 0.9163

Epoch 2/5


#### Training Dataset shape

(140, 11)
(30, 11)
Training set: Difference in mean outcomes = -0.458
Test set: Difference in mean outcomes = -0.057
Best balanced accuracy (no fairness constraints) = 0.6136
Optimal classification threshold (no fairness constraints) = 0.8613


#### Test set

##### Raw predictions - No fairness constraints, only maximizing balanced accuracy

Balanced accuracy = 0.5750
Statistical parity difference = -0.2727
Disparate impact = 0.0000
Average odds difference = -0.2381
Equal opportunity difference = -0.3333
Theil index = 0.7153

Epoch 3/5


#### Training Dataset shape

(140, 11)
(30, 11)
Training set: Difference in mean outcomes = 0.108
Test set: Difference in mean outcomes = -0.370
Best balanced accuracy (no fairness constraints) = 0.6242
Optimal classification threshold (no fairness constraints) = 0.6732


#### Test set

##### Raw predictions - No fairness constraints, only maximizing balanced accuracy

Balanced accuracy = 0.5250
Statistical parity difference = 0.0000
Disparate impact = 1.0000
Average odds difference = -0.0592
Equal opportunity difference = -0.3684
Theil index = 0.6134

Epoch 4/5


#### Training Dataset shape

(140, 11)
(30, 11)
Training set: Difference in mean outcomes = 0.018
Test set: Difference in mean outcomes = -0.081
Best balanced accuracy (no fairness constraints) = 0.7460
Optimal classification threshold (no fairness constraints) = 0.6633


#### Test set

##### Raw predictions - No fairness constraints, only maximizing balanced accuracy

Balanced accuracy = 0.6411
Statistical parity difference = -0.2671
Disparate impact = 0.6161
Average odds difference = -0.2333
Equal opportunity difference = -0.3000
Theil index = 0.2310

Epoch 5/5


#### Training Dataset shape

(140, 11)
(30, 11)
Training set: Difference in mean outcomes = -0.407
Test set: Difference in mean outcomes = -0.074
Best balanced accuracy (no fairness constraints) = 0.6111
Optimal classification threshold (no fairness constraints) = 0.5445


#### Test set

##### Raw predictions - No fairness constraints, only maximizing balanced accuracy

Balanced accuracy = 0.5739
Statistical parity difference = -0.4444
Disparate impact = 0.4286
Average odds difference = -0.5071
Equal opportunity difference = -0.3000
Theil index = 0.2310
