In [3]:
# import sys

import numpy as np
import pandas as pd
from warnings import warn

from aif360.datasets import GermanDataset, StandardDataset
from aif360.metrics import ClassificationMetric, BinaryLabelDatasetMetric
from common_utils import compute_metrics
# from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
        # import load_preproc_data_german
from data_preproc_functions import load_preproc_data_german
from sklearn.preprocessing import MaxAbsScaler
from aif360.algorithms.inprocessing import MetaFairClassifier
from aif360.algorithms.postprocessing import RejectOptionClassification


from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score

from IPython.display import Markdown, display

2024-05-01 11:04:29.278471: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  warn_deprecated('vmap', 'torch.vmap')


In [4]:
def load_dropped_data_german(protected_attributes=None):
    dataset = load_preproc_data_german()
    df = dataset.convert_to_dataframe()[0]
    df = df[['age','credit_history=Other','savings=<500','credit']]

    # Feature partitions
    # XD_features = ['credit_history', 'savings', 'employment', 'sex', 'age']
    D_features = ['sex', 'age'] if protected_attributes is None else protected_attributes
    Y_features = ['credit']
    # X_features = list(set(XD_features)-set(D_features))
    # print(X_features)
    categorical_features = ['credit_history', 'savings', 'employment']

    # privileged classes
    all_privileged_classes = {"sex": [1.0],
                              "age": [1.0]}

    # protected attribute maps
    all_protected_attribute_maps = {"sex": {1.0: 'Male', 0.0: 'Female'},
                                    "age": {1.0: 'Old', 0.0: 'Young'}}
    new_german_data = StandardDataset(
            df=df,
            label_name=Y_features[0],
            favorable_classes=[1],
            protected_attribute_names=D_features,
            privileged_classes=[all_privileged_classes[x] for x in D_features],
            instance_weights_name=None,
            features_to_keep=['credit_history=Other','savings=<500']+Y_features+D_features,
            metadata={ 'label_maps': [{1.0: 'Good Credit', 2.0: 'Bad Credit'}],
                    'protected_attribute_maps': [all_protected_attribute_maps[x]
                                    for x in D_features]})
    
    return new_german_data

In [5]:
privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

In [6]:
# DROPPED DATA
dataset_orig = load_dropped_data_german(['age'])

# print out some labels, names, etc.
display(Markdown("#### Training Dataset shape"))
print(dataset_orig.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(dataset_orig.favorable_label, dataset_orig.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(dataset_orig.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(dataset_orig.privileged_protected_attributes, 
      dataset_orig.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(dataset_orig.feature_names)
display(Markdown("#### Dataset label"))
print(dataset_orig.label_names)

['employment', 'savings', 'credit_history']
0      A34
1      A32
2      A34
3      A32
4      A33
      ... 
995    A32
996    A32
997    A32
998    A32
999    A34
Name: credit_history, Length: 1000, dtype: object
0          Other
1      None/Paid
2          Other
3      None/Paid
4          Delay
         ...    
995    None/Paid
996    None/Paid
997    None/Paid
998    None/Paid
999        Other
Name: credit_history, Length: 1000, dtype: object


#### Training Dataset shape

(1000, 3)


#### Favorable and unfavorable labels

1.0 2.0


#### Protected attribute names

['age']


#### Privileged and unprivileged protected attribute values

[array([1.])] [array([0.])]


#### Dataset feature names

['age', 'credit_history=Other', 'savings=<500']


#### Dataset label

['credit']


In [7]:
# split data into epochs, each with a different group of agents
NUM_EPOCHS = 2
dataset_orig_epochs = dataset_orig.split(NUM_EPOCHS, shuffle=True)
print("Size of epoch: ", dataset_orig_epochs[0].features.shape)

Size of epoch:  (500, 3)


In [8]:
# NO FAIRNESS
# takes two epochs of data
# trains on epoch_1, then uses model to classify epoch 2

def no_fairness_train_and_classify(data_epoch_1, data_epoch_2):
    # print out some labels, names, etc.
    display(Markdown("#### No Fairness"))
    print("Epoch 1: ", data_epoch_1.features.shape)
    print("Epoch 2: ",data_epoch_2.features.shape)

    # train classifier on epoch 1
    scale_orig = StandardScaler()
    X_train = scale_orig.fit_transform(data_epoch_1.features)
    y_train = data_epoch_1.labels.ravel()
    lmod = LogisticRegression(solver='liblinear')  # Solver specified to avoid future warnings
    lmod.fit(X_train, y_train)

    # classify epoch 2 agents
    X_epoch2 = scale_orig.fit_transform(data_epoch_2.features)
    y_epoch2_pred = lmod.predict(X_epoch2)
    data_epoch_2_pred = data_epoch_2.copy(deepcopy=True)
    data_epoch_2_pred.labels = y_epoch2_pred

    # print("Classifications: ", data_epoch_2_pred.labels)

    # Evaluate fairness metrics on classification of epoch 2
    metric_train = BinaryLabelDatasetMetric(data_epoch_2_pred, 
                                            unprivileged_groups=unprivileged_groups,
                                            privileged_groups=privileged_groups)
    
    print("Training set: Difference in mean outcomes = {:.3f}".format(metric_train.mean_difference()))

    metric_test_aft = compute_metrics(data_epoch_2, data_epoch_2_pred, 
                unprivileged_groups, privileged_groups)
    
    # The estimated coefficients will all be around 1:
    print(lmod.coef_)
    print(data_epoch_1.feature_names)

    return lmod

In [9]:
def ROC_train_and_classify(data_epoch_1, data_epoch_2):
    # print out some labels, names, etc.
    display(Markdown("#### ROC Fairness"))
    print("Epoch 1: ", data_epoch_1.features.shape)
    print("Epoch 2: ",data_epoch_2.features.shape)

    # Metric used (should be one of allowed_metrics)
    metric_name = "Statistical parity difference"

    # Upper and lower bound on the fairness metric used
    metric_ub = 0.05
    metric_lb = -0.05

    scale_orig = StandardScaler()

    # need to first train a model to get predicted scores
    X_train = scale_orig.fit_transform(data_epoch_1.features)
    y_train = data_epoch_1.labels.ravel()
    lmod = LogisticRegression(solver='liblinear')  # Solver specified to avoid future warnings
    lmod.fit(X_train, y_train)

    # indices of favorable label
    pos_ind = np.where(lmod.classes_ == data_epoch_1.favorable_label)[0][0]

    # data_epoch_1_pred contains PREDICTED SCORES
    # use same epoch 1 data instead of separate validation
    data_epoch_1_pred = data_epoch_1.copy(deepcopy=True)
    X_train = scale_orig.transform(data_epoch_1_pred.features)
    data_epoch_1_pred.scores = lmod.predict_proba(X_train)[:,pos_ind].reshape(-1,1)

    ROC = RejectOptionClassification(unprivileged_groups=unprivileged_groups, 
                                 privileged_groups=privileged_groups, 
                                 low_class_thresh=0.01, high_class_thresh=0.99,
                                  num_class_thresh=100, num_ROC_margin=50,
                                  metric_name=metric_name,
                                  metric_ub=metric_ub, metric_lb=metric_lb)
    ROC = ROC.fit(data_epoch_1, data_epoch_1_pred)

    print("Optimal classification threshold (with fairness constraints) = %.4f" % ROC.classification_threshold)
    print("Optimal ROC margin = %.4f" % ROC.ROC_margin)

    # Metrics for the transformed test set
    data_epoch_2_pred = ROC.predict(data_epoch_2)

    # Evaluate fairness metrics on classification of epoch 2
    metric_train = BinaryLabelDatasetMetric(data_epoch_2_pred, 
                                            unprivileged_groups=unprivileged_groups,
                                            privileged_groups=privileged_groups)
    
    print("Training set: Difference in mean outcomes = {:.3f}".format(metric_train.mean_difference()))

    metric_test_aft = compute_metrics(data_epoch_2, data_epoch_2_pred, 
                unprivileged_groups, privileged_groups)
    
    return lmod

In [10]:
no_fairness_train_and_classify(dataset_orig_epochs[0],dataset_orig_epochs[1])

#### No Fairness

Epoch 1:  (500, 3)
Epoch 2:  (500, 3)
Training set: Difference in mean outcomes = -0.559
Balanced accuracy = 0.5391
Statistical parity difference = -0.5591
Disparate impact = 0.4409
Average odds difference = -0.5840
Equal opportunity difference = -0.4915
Theil index = 0.1144
[[-0.26371839 -0.43193997  0.39963863]]
['age', 'credit_history=Other', 'savings=<500']


In [11]:
ROC_train_and_classify(dataset_orig_epochs[0],dataset_orig_epochs[1])

#### ROC Fairness

Epoch 1:  (500, 3)
Epoch 2:  (500, 3)
Optimal classification threshold (with fairness constraints) = 0.6336
Optimal ROC margin = 0.1645
Training set: Difference in mean outcomes = -0.095
Balanced accuracy = 1.0000
Statistical parity difference = -0.0953
Disparate impact = 0.8694
Average odds difference = 0.0000
Equal opportunity difference = 0.0000
Theil index = 0.0000


In [12]:
def append_dataset_history(dataset_history, new_epoch, protected_attributes=None):
    history_df = dataset_history.convert_to_dataframe()[0]
    epoch_df = new_epoch.convert_to_dataframe()[0]

    history_df = pd.concat([history_df,epoch_df])

    # CONVERT DATASET
    D_features = ['sex', 'age'] if protected_attributes is None else protected_attributes
    Y_features = ['credit']
    # X_features = list(set(XD_features)-set(D_features))
    # print(X_features)
    categorical_features = ['credit_history', 'savings', 'employment']

    # privileged classes
    all_privileged_classes = {"sex": [1.0],
                              "age": [1.0]}

    # protected attribute maps
    all_protected_attribute_maps = {"sex": {1.0: 'Male', 0.0: 'Female'},
                                    "age": {1.0: 'Old', 0.0: 'Young'}}
    result = StandardDataset(
            df=history_df,
            label_name=Y_features[0],
            favorable_classes=[1],
            protected_attribute_names=D_features,
            privileged_classes=[all_privileged_classes[x] for x in D_features],
            instance_weights_name=None,
            features_to_keep=['credit_history=Other','savings=<500']+Y_features+D_features,
            metadata={ 'label_maps': [{1.0: 'Good Credit', 2.0: 'Bad Credit'}],
                    'protected_attribute_maps': [all_protected_attribute_maps[x]
                                    for x in D_features]})

    aligned_dataset = dataset_history.align_datasets(result)

    return aligned_dataset

In [13]:
# TEST THAT THE APPENDING WORKS

history_data = dataset_orig_epochs[0]
for epoch_ind in range(1,len(dataset_orig_epochs)):
    print("Size of epoch: ", dataset_orig_epochs[epoch_ind].features.shape)
    history_data = append_dataset_history(history_data,dataset_orig_epochs[epoch_ind], ["age"])
    print("Size of history: ", history_data.features.shape)

Size of epoch:  (500, 3)
Size of history:  (1000, 3)


In [17]:
ADV_MOD_PROB_MEAN = 0.8
ADV_MOD_PROB_STD = 0.1
STD_MOD_PROB_MEAN = 0.4
STD_MOD_PROB_STD = 0.2
MOD_THRESHOLD = 0

def strategizing_agents(data_history, data_epoch, model):
    # advantaged agents have higher distribution of flipping coin to modify
    # if successful, can choose which feature to modify

    # get scaled data from agent
    scale_orig = StandardScaler()
    X_train = scale_orig.fit_transform(data_epoch.features)
    y_train = data_epoch.labels.ravel()

    bad_credit_count = 0
    mod_count = 0
    new_classication_count = 0
    adv_agent_new_classificaton_count = 0
    adv_agent_count = 0

    for agent_ind in range(data_epoch.features.shape[0]):
        print(f"Agent {agent_ind}")
        print(X_train[agent_ind])
        if X_train[agent_ind][0] > 0:
            adv_agent_count +=1

        # penalize everyone by the meaningful feature (credit history)
        # subtract by a standard deviation
        X_train[agent_ind][1] -= 3*scale_orig.scale_[1]
        
        agent_classification = model.predict([X_train[agent_ind]])
        print("classify: ",agent_classification)
        if agent_classification == 2: # bad credit
            bad_credit_count+=1
            print("bad credit")
            # under 25, disadvantaged
            if data_epoch.unprivileged_protected_attributes[0][0] == 0: 
                prob_change_feature = np.random.normal(STD_MOD_PROB_MEAN,STD_MOD_PROB_STD)
            else: 
                prob_change_feature = np.random.normal(ADV_MOD_PROB_MEAN,ADV_MOD_PROB_STD)
        else:
            prob_change_feature = 0
        if prob_change_feature > MOD_THRESHOLD:
            mod_count += 1
            for feature_ind in range(len(data_history.feature_names)):
                print(data_history.feature_names[feature_ind])
                if data_history.feature_names[feature_ind] is not data_history.protected_attribute_names[0]:
                    if X_train[agent_ind][feature_ind] < 0:
                        new_train = list(X_train[agent_ind])
                        new_train[feature_ind] = 1*scale_orig.scale_[feature_ind]
                        new_classify = model.predict([new_train])
                        if new_classify is not agent_classification:
                            new_classication_count+=1
                            if X_train[agent_ind][0] > 0:
                                adv_agent_new_classificaton_count+=1
                        print(new_classify)
                        print(new_train)
    

    print("adv_agent_count", adv_agent_count)
    print("bad_credit_count: ",bad_credit_count)
    print("mod_count: ", mod_count)
    print("new_classication_count: ", new_classication_count)
    print("adv_agent_new_classificaton_count: ",adv_agent_new_classificaton_count)
                    


In [18]:
lmod = no_fairness_train_and_classify(dataset_orig_epochs[0],dataset_orig_epochs[1])

strategizing_agents(dataset_orig_epochs[0],dataset_orig_epochs[1],lmod)

#### No Fairness

Epoch 1:  (500, 3)
Epoch 2:  (500, 3)
Training set: Difference in mean outcomes = -0.559
Balanced accuracy = 0.5391
Statistical parity difference = -0.5591
Disparate impact = 0.4409
Average odds difference = -0.5840
Equal opportunity difference = -0.4915
Theil index = 0.1144
[[-0.26371839 -0.43193997  0.39963863]]
['age', 'credit_history=Other', 'savings=<500']
Agent 0
[-2.09197134 -0.63289827 -1.47124334]
classify:  [1.]
Agent 1
[-2.09197134 -0.63289827  0.67969722]
classify:  [2.]
bad credit
modification
age
credit_history=Other
[1.]
[-2.0919713396749775, 0.4518893669915219, 0.6796972150108942]
savings=<500
Agent 2
[ 0.47801802 -0.63289827  0.67969722]
classify:  [2.]
bad credit
modification
age
credit_history=Other
[1.]
[0.4780180211050933, 0.4518893669915219, 0.6796972150108942]
savings=<500
Agent 3
[ 0.47801802 -0.63289827  0.67969722]
classify:  [2.]
bad credit
modification
age
credit_history=Other
[1.]
[0.4780180211050933, 0.4518893669915219, 0.6796972150108942]
savings=<500
Age