In [73]:
# Load all necessary packages
import sys
sys.path.insert(1, "../")  

import numpy as np
np.random.seed(0)

from aif360.datasets import CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.algorithms.preprocessing import Reweighing

# Scalers
from sklearn.preprocessing import StandardScaler

# Classifiers
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline

from IPython.display import Markdown, display

In [None]:
import pandas as pd
df = pd.read_csv('/home/alexander/miniconda3/envs/aiFora360/lib/python3.8/site-packages/aif360/data/raw/compas/compas-scores-two-years.csv')
df.info()
print(df['sex'].value_counts())

In [None]:
dataset_orig = CompasDataset(
    protected_attribute_names=['sex'],
    privileged_classes=[['Female']],
    features_to_drop=['race', 'age']
)

dataset_orig_train, dataset_orig_val, dataset_orig_test = dataset_orig.split([0.5,0.8], shuffle=True)

privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]

In [66]:
def describe(train=None, val=None, test=None):
    if train is not None:
        display(Markdown("#### Training Dataset shape"))
        print(train.features.shape)
    if val is not None:
        display(Markdown("#### Validation Dataset shape"))
        print(val.features.shape)
    display(Markdown("#### Test Dataset shape"))
    print(test.features.shape)
    display(Markdown("#### Favorable and unfavorable labels"))
    print(test.favorable_label, test.unfavorable_label)
    display(Markdown("#### Protected attribute names"))
    print(test.protected_attribute_names)
    display(Markdown("#### Privileged and unprivileged protected attribute values"))
    print(test.privileged_protected_attributes, 
          test.unprivileged_protected_attributes)
#     display(Markdown("#### Dataset feature names"))
#     print(test.feature_names)

In [None]:
describe(train=dataset_orig_train, val=dataset_orig_val, test=dataset_orig_test)

In [None]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train,
                                            privileged_groups=privileged_groups,
                                            unprivileged_groups=unprivileged_groups)
analyze(metric=metric_orig_train)
# TODO: Add explainer

In [64]:
dataset = dataset_orig_train
model = make_pipeline(StandardScaler(),
                      LogisticRegression(solver='liblinear', random_state=1))
fit_params = {'logisticregression__sample_weight': dataset.instance_weights}

lr_orig_panel = model.fit(dataset.features, dataset.labels.ravel(), **fit_params)

In [63]:
from collections import defaultdict

def test(dataset, model, thresh_arr):
    try:
        # sklearn classifier
        y_val_pred_prob = model.predict_proba(dataset.features)
        pos_ind = np.where(model.classes_ == dataset.favorable_label)[0][0]
    except AttributeError:
        # aif360 inprocessing algorithm
        y_val_pred_prob = model.predict(dataset).scores
        pos_ind = 0
    
    metric_arrs = defaultdict(list)
    for thresh in thresh_arr:
        y_val_pred = (y_val_pred_prob[:, pos_ind] > thresh).astype(np.float64)

        dataset_pred = dataset.copy()
        dataset_pred.labels = y_val_pred
        metric = ClassificationMetric(
                dataset, dataset_pred,
                unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
        metric_arrs['bal_acc'].append((metric.true_positive_rate()
                                     + metric.true_negative_rate()) / 2)
        metric_arrs['avg_odds_diff'].append(metric.average_odds_difference())
        metric_arrs['disp_imp'].append(metric.disparate_impact())
        metric_arrs['stat_par_diff'].append(metric.statistical_parity_difference())
        metric_arrs['eq_opp_diff'].append(metric.equal_opportunity_difference())
        metric_arrs['theil_ind'].append(metric.theil_index())
    
    return metric_arrs

In [65]:
thresh_arr = np.linspace(0.01, 0.5, 50)
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_orig_panel,
                   thresh_arr=thresh_arr)
lr_orig_best_ind = np.argmax(val_metrics['bal_acc'])

In [68]:
def describe_metrics(metrics, thresh_arr):
    best_ind = np.argmax(metrics['bal_acc'])
    print("Threshold corresponding to Best balanced accuracy: {:6.4f}".format(thresh_arr[best_ind]))
    print("Best balanced accuracy: {:6.4f}".format(metrics['bal_acc'][best_ind]))
#     disp_imp_at_best_ind = np.abs(1 - np.array(metrics['disp_imp']))[best_ind]
    disp_imp_at_best_ind = 1 - min(metrics['disp_imp'][best_ind], 1/metrics['disp_imp'][best_ind])
    print("Corresponding 1-min(DI, 1/DI) value: {:6.4f}".format(disp_imp_at_best_ind))
    print("Corresponding average odds difference value: {:6.4f}".format(metrics['avg_odds_diff'][best_ind]))
    print("Corresponding statistical parity difference value: {:6.4f}".format(metrics['stat_par_diff'][best_ind]))
    print("Corresponding equal opportunity difference value: {:6.4f}".format(metrics['eq_opp_diff'][best_ind]))
    print("Corresponding Theil index value: {:6.4f}".format(metrics['theil_ind'][best_ind]))

In [69]:
describe_metrics(val_metrics, thresh_arr)

Threshold corresponding to Best balanced accuracy: 0.0100
Best balanced accuracy: 0.4925
Corresponding 1-min(DI, 1/DI) value: 0.4367
Corresponding average odds difference value: 0.0057
Corresponding statistical parity difference value: 0.0113
Corresponding equal opportunity difference value: 0.0164
Corresponding Theil index value: 0.7761


In [70]:
lr_orig_metrics = test(dataset=dataset_orig_test,
                       model=lr_orig_panel19,
                       thresh_arr=[thresh_arr[lr_orig_best_ind]])

In [85]:
describe_metrics(lr_orig_metrics, [thresh_arr[lr_orig_best_ind]])

Threshold corresponding to Best balanced accuracy: 0.0100
Best balanced accuracy: 0.4946
Corresponding 1-min(DI, 1/DI) value: 0.5989
Corresponding average odds difference value: 0.0105
Corresponding statistical parity difference value: 0.0131
Corresponding equal opportunity difference value: 0.0185
Corresponding Theil index value: 0.7974


In [None]:
dataset = dataset_orig_train
model_rf = make_pipeline(StandardScaler(),
                      RandomForestClassifier(n_estimators=500, min_samples_leaf=25))
fit_params = {'randomforestclassifier__sample_weight': dataset.instance_weights}
rf_orig_panel = model_rf.fit(dataset.features, dataset.labels.ravel(), **fit_params)

In [82]:
thresh_arr_rf = np.linspace(0.01, 0.5, 50)
val_metrics_rf = test(dataset=dataset_orig_val,
                   model=rf_orig_panel,
                   thresh_arr=thresh_arr_rf)
rf_orig_best_ind = np.argmax(val_metrics_rf['bal_acc'])

invalid value encountered in double_scalars
divide by zero encountered in double_scalars


In [83]:
describe_metrics(val_metrics_rf, thresh_arr_rf)

Threshold corresponding to Best balanced accuracy: 0.0100
Best balanced accuracy: 0.5000
Corresponding 1-min(DI, 1/DI) value:    nan
Corresponding average odds difference value: 0.0000
Corresponding statistical parity difference value: 0.0000
Corresponding equal opportunity difference value: 0.0000
Corresponding Theil index value: 0.7848


In [84]:
rf_orig_metrics = test(dataset=dataset_orig_test,
                       model=rf_orig_panel,
                       thresh_arr=[thresh_arr_rf[rf_orig_best_ind]])

invalid value encountered in double_scalars
