In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from os import path
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
import sklearn.preprocessing
from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
import src.lib.utility_classfier as uclf
import src.lib.optimal_threhold_related as thres
import src.lib.fairness_tests as fair

In [2]:
data_path='/Users/lifuchen/Desktop/research/data.csv'
df = pd.read_csv(data_path)

In [3]:
y = df.Class.values
X = df.drop(['GRID','Class'], axis=1)
X.shape

(109490, 87)

In [4]:
def get_result (classifier, records, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):
        method_to_call = getattr(uclf, classifier)
        y_val_score = method_to_call(X_train_scaled, y_train,X_val_scaled, y_val)
        y_test_score = method_to_call(X_train_scaled, y_train,X_test_scaled, y_test)
        
        threshold, ba_val, ba_test = balance_accuracy (y_val, y_val_score,y_test, y_test_score)
        auroc = roc_auc_score(y_val, y_val_score)
        
        y_val_score_white = method_to_call(X_train_scaled, y_train, X_val_white_scaled, y_val_white)
        y_test_score_white = method_to_call(X_train_scaled, y_train,X_test_white_scaled, y_test_white)
        threshold_white, ba_val_white, ba_test_white = balance_accuracy (y_val_white, y_val_score_white,y_test_white, y_test_score_white)

        y_val_score_black = method_to_call(X_train_scaled, y_train, X_val_black_scaled, y_val_black)
        y_test_score_black = method_to_call(X_train_scaled, y_train,X_test_black_scaled, y_test_black)
        threshold_black, ba_val_black, ba_test_black = balance_accuracy (y_val_black, y_val_score_black, y_test_black, y_test_score_black)

        eod = fair.get_EOD(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)
        sp = fair.get_SP(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)

        records.append({
            'auroc': auroc,
            'overall threshold': threshold,
            'overall ba validation': ba_val,
            'overall ba test': ba_test,
            'white threshold': threshold_white,
            'white ba validation': ba_val_white,
            'white ba test': ba_test_white,
            'black threshold': threshold_black,
            'black ba validation': ba_val_black,
            'black ba test': ba_test_black,
            'eod': eod,
            'di': sp,
        })

In [5]:
def balance_accuracy (y_val, y_val_score,y_test, y_test_score):
    
    threshold, _ = thres.get_optimal_threshold_Jvalue (y_val, y_val_score)
    print ("Optimal threshold by J value is ",threshold)

    ba_val = thres.calculate_balanced_accuracy(y_val, y_val_score, threshold)
    print ("Balanced accuracy score of val is ", ba_val)

    ba_test = thres.calculate_balanced_accuracy(y_test, y_test_score, threshold)
    print ("Balanced accuracy score of test is ",ba_test)

    return threshold, ba_val, ba_test

In [6]:
def fairness_metrics (X, y, attribute, random_state):
    global threshold
    X_train, y_train, X_val, y_val, X_test, y_test, X_val_white, X_val_black, y_val_white, y_val_black, X_test_white, X_test_black, y_test_white, y_test_black \
        = fair.split_by_trait(X, y, attribute, random_state)
    print("X train", X_train.shape[0])
    print("Y train", y_train.shape[0])
    print(X_val.shape[0], X_val_white.shape[0], X_val_black.shape[0])
    print(y_val.shape[0], y_val_white.shape[0], y_val_black.shape[0])
    print(X_test.shape[0], X_test_white.shape[0], X_test_black.shape[0])
    print(y_test.shape[0], y_test_white.shape[0], y_test_black.shape[0])

    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_train_scaled = max_abs_scaler.fit_transform(X_train)
    X_test_scaled = max_abs_scaler.transform(X_test)
    X_test_white_scaled = max_abs_scaler.transform(X_test_white)
    X_test_black_scaled = max_abs_scaler.transform(X_test_black)
    X_val_scaled = max_abs_scaler.transform(X_val)
    X_val_white_scaled = max_abs_scaler.transform(X_val_white)
    X_val_black_scaled = max_abs_scaler.transform(X_val_black)

    get_result ("logic_regression", records_lr, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("random_forest", records_rf, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("decision_tree", records_dt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("gradiant_boosting", records_gbt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

In [None]:
records_lr = []
records_rf = []
records_dt = []
records_gbt = []
for random_state in range(0,16):
    fairness_metrics (X, y, "Race_W", random_state)

result_lr = pd.DataFrame(records_lr)
result_rf = pd.DataFrame(records_rf)
result_dt = pd.DataFrame(records_dt)
result_gbt = pd.DataFrame(records_gbt)


X train 65694
Y train 65694
21898 18899 2999
21898 18899 2999
21898 18968 2930
21898 18968 2930


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25882185696726207
0.2619870551548744
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.43      0.03      0.06      1994

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19822    82]
 [ 1931    63]]
done in 2.171373s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25882185696726207
0.2623041485961046
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.43      0.03      0.06      1964

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19847    87]
 [ 1899    65]]
done in 2.584278s
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.41300000000000003
threshold:0.2, J-value:0.26499999999999996
threshold:0.30000000000000004, J-value:0.152
threshold:0.4, J-value:0.07100000000000001
threshold:0.5, J-value:0.028
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7064876804851854
Balanced accuracy score of test is  0.6998057183409937


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25882185696726207
0.26282254893490486
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17180
           1       0.45      0.03      0.06      1719

    accuracy                           0.91     18899
   macro avg       0.68      0.51      0.51     18899
weighted avg       0.87      0.91      0.87     18899

Confusion_matrix
[[17114    66]
 [ 1665    54]]
done in 2.364846s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25882185696726207
0.26278189359925863
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17260
           1       0.42      0.03      0.06      1708

    accuracy                           0.91     18968
   macro avg       0.67      0.51      0.51     18968
weighted avg       0.87      0.91      0.87     18968

Confusion_matrix
[[17186    74]
 [ 1654    54]]
done in 2.242202s
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.40700000000000003
threshold:0.2, J-value:0.259
threshold:0.30000000000000004, J-value:0.152
threshold:0.4, J-value:0.069
threshold:0.5, J-value:0.027
threshold:0.6000000000000001, J-value:0.011
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7032689667829457
Balanced accuracy score of test is  0.7000665534150518


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25882185696726207
0.25672196780949413
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2724
           1       0.36      0.03      0.06       275

    accuracy                           0.91      2999
   macro avg       0.64      0.51      0.51      2999
weighted avg       0.86      0.91      0.87      2999

Confusion_matrix
[[2708   16]
 [ 266    9]]
done in 2.646432s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25882185696726207
0.259211361149066
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2674
           1       0.46      0.04      0.08       256

    accuracy                           0.91      2930
   macro avg       0.69      0.52      0.52      2930
weighted avg       0.88      0.91      0.88      2930

Confusion_matrix
[[2661   13]
 [ 245   11]]
done in 2.591161s
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.45300000000000007
threshold:0.2, J-value:0.303
threshold:0.30000000000000004, J-value:0.156
threshold:0.4, J-value:0.082
threshold:0.5, J-value:0.027000000000000003
threshold:0.6000000000000001, J-value:0.013999999999999999
threshold:0.7000000000000001, J-value:-0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7267127219329863
Balanced accuracy score of test is  0.6977535410433807
True positive rate of class 1 

In [None]:
def add_mean_sd(records, result_table):
    records.append({
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["threshold"].mean(),
        'male threshold': result_table["threshold_male"].mean(),
        'female threshold': result_table["threshold_female"].mean(),
        'overall ba validation': result_table["ba_val"].mean(),
        'overall ba test': result_table["ba_test"].mean(),
        'male ba validation': result_table["ba_val_male"].mean(),
        'male ba test': result_table["ba_test_male"].mean(),
        'female ba validation': result_table["ba_val_female"].mean(),
        'female ba test': result_table["ba_test_female"].mean(),
        'overall precision':result_table["precision"].mean(),
        'overall recall':result_table["recall"].mean(),
        'overall tpr':result_table["tpr"].mean(),
        'overall tnr':result_table["tnr"].mean(),
        'overall pd':result_table["pd"].mean(),
        'male precision':result_table["precision_male"].mean(),
        'male recall':result_table["recall_male"].mean(),
        'male tpr':result_table["tpr_male"].mean(),
        'male tnr':result_table["tnr_male"].mean(),
        'male pd':result_table["pd_male"].mean(),
        'female precision':result_table["precision_female"].mean(),
        'female recall':result_table["recall_female"].mean(),
        'female tpr':result_table["tpr_female"].mean(),
        'female tnr':result_table["tnr_female"].mean(),
        'female pd':result_table["pd_female"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["sp"].mean(),
        })
    records.append({
        'auroc': result_table["auroc"].std(),
        'overall threshold': result_table["threshold"].std(),
        'male threshold': result_table["threshold_male"].std(),
        'female threshold': result_table["threshold_female"].std(),
        'male ba validation': result_table["ba_val_male"].std(),
        'male ba test': result_table["ba_test_male"].std(),
        'female ba validation': result_table["ba_val_female"].std(),
        'female ba test': result_table["ba_test_female"].std(),
        'overall ba validation': result_table["ba_val"].std(),
        'overall ba test': result_table["ba_test"].std(),
        'overall precision':result_table["precision"].std(),
        'overall recall':result_table["recall"].std(),
        'overall tpr':result_table["tpr"].std(),
        'overall tnr':result_table["tnr"].std(),
        'overall pd':result_table["pd"].std(),
        'male precision':result_table["precision_male"].std(),
        'male recall':result_table["recall_male"].std(),
        'male tpr':result_table["tpr_male"].std(),
        'male tnr':result_table["tnr_male"].std(),
        'male pd':result_table["pd_male"].std(),
        'female precision':result_table["precision_female"].std(),
        'female recall':result_table["recall_female"].std(),
        'female tpr':result_table["tpr_female"].std(),
        'female tnr':result_table["tnr_female"].std(),
        'female pd':result_table["pd_female"].std(),
        'eod': result_table["eod"].std(),
        'di': result_table["sp"].std(),
        })
    records.append({
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["threshold"].mean(),
        'male threshold': result_table["threshold_male"].mean(),
        'female threshold': result_table["threshold_female"].mean(),
        'overall ba test': result_table["ba_test"].mean(),
        'male ba test': result_table["ba_test_male"].mean(),
        'female ba test': result_table["ba_test_female"].mean(),
        'overall tpr':result_table["tpr"].mean(),
        'overall pd':result_table["pd"].mean(),
        'male tpr':result_table["tpr_male"].mean(),
        'male pd':result_table["pd_male"].mean(),
        'female tpr':result_table["tpr_female"].mean(),
        'female pd':result_table["pd_female"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["sp"].mean(),
        })
    pd_result = pd.DataFrame(records)
    return pd_result


In [None]:
result_lr = add_mean_sd (records_lr, result_lr)
result_rf = add_mean_sd (records_rf, result_rf)
result_dt = add_mean_sd (records_dt, result_dt)
result_gbt = add_mean_sd (records_gbt, result_gbt)

result_path='/Users/lifuchen/Desktop/research/results/'
result_lr.to_csv(path.join(result_path,'race-lr-result.csv'), index=False)
result_rf.to_csv(path.join(result_path,'race-rf-result.csv'), index=False)
result_dt.to_csv(path.join(result_path,'race-dt-result.csv'), index=False)
result_gbt.to_csv(path.join(result_path,'race-gbt-result.csv'), index=False)

In [None]:
def print_result(result_table):
    print ('overall ba validation: %.4f (+/- %.4f)' % (result_table["overall ba validation"].mean(), result_table["overall ba validation"].std()))
    print ('overall ba test: %.4f (+/- %.4f)' % (result_table["overall ba test"].mean(), result_table["overall ba test"].std()))
    print ('white ba validation: %.4f (+/- %.4f)' % (result_table["white ba validation"].mean(), result_table["white ba validation"].std()))
    print ('white ba test: %.4f (+/- %.4f)' % (result_table["white ba test"].mean(), result_table["white ba test"].std()))
    print ('black ba validation: %.4f (+/- %.4f)' % (result_table["black ba validation"].mean(), result_table["black ba validation"].std()))
    print ('black ba test: %.4f (+/- %.4f)' % (result_table["black ba test"].mean(), result_table["black ba test"].std()))
    print ('eod: %.4f (+/- %.4f)' % (result_table["eod"].mean(), result_table["eod"].std()))
    print ('di: %.4f (+/- %.4f)' % (result_table["di"].mean(), result_table["di"].std()))