In [43]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from os import path
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
import sklearn.preprocessing
from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
import src.lib.utility_classfier as uclf
import src.lib.optimal_threhold_related as thres
import src.lib.fairness_tests as fair

In [44]:
data_path='/Users/lifuchen/Desktop/research/data.csv'
df = pd.read_csv(data_path)

In [45]:
y = df.Class.values
X = df.drop(['GRID','Class'], axis=1)
X.shape

(109490, 87)

In [46]:
def save_prediction(classifier, characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female):
    method_to_call = getattr(uclf, classifier)
    file_path = '/Users/lifuchen/Desktop/Evaluating-and-Mitigating-Bias-in-ML-Models-for-CVD/Models/'
    filename = str(classifier) + '_' + characteristic[-1] + '_model.sav'
    clf = method_to_call(X_train_scaled, y_train, X_test_scaled, y_test, dump_model=False, file_name = file_path + filename)
    
    y_val_score = clf.predict_proba(X_val_scaled)[:, 1]
    y_test_score = clf.predict_proba(X_test_scaled)[:, 1]
    y_val_score_male = clf.predict_proba(X_val_male_scaled)[:, 1]
    y_test_score_male = clf.predict_proba(X_test_male_scaled)[:, 1]
    y_val_score_female = clf.predict_proba(X_val_female_scaled)[:, 1]
    y_test_score_female = clf.predict_proba(X_test_female_scaled)[:, 1] 
    
    my_dict = dict(val_score = y_val_score, test_score = y_test_score, val_1_score = y_val_score_male, test_1_score = y_test_score_male, val_2_score = y_val_score_female, test_2_score = y_test_score_female)
    overall_prediction = pd.DataFrame.from_dict(my_dict, orient='index')
    overall_prediction = overall_prediction.transpose()

    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + str(characteristic) + "prediction.csv"
    overall_prediction.to_csv(path.join(result_path, filename), index=False)

In [47]:
def get_result (classifier, characteristic, records, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):
    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + characteristic + "prediction.csv"
    prediction = pd.read_csv(path.join(result_path, filename))

    y_val_score = prediction['val_score'][prediction['val_score'].notna()]
    y_test_score = prediction['test_score'][prediction['test_score'].notna()]

    y_val_score_white = prediction['val_1_score'][prediction['val_1_score'].notna()]
    y_test_score_white = prediction['test_1_score'][prediction['test_1_score'].notna()]

    y_val_score_black = prediction['val_2_score'][prediction['val_2_score'].notna()]
    y_test_score_black = prediction['test_2_score'][prediction['test_2_score'].notna()]

    threshold, ba_val, ba_test = balance_accuracy (y_val, y_val_score,y_test, y_test_score)
    auroc = roc_auc_score(y_test, y_test_score)
    precision, recall, tpr, tnr, pd_overall = thres.calculate_precision_metrics(y_test, y_test_score,threshold)

    threshold_white, ba_val_white, ba_test_white = balance_accuracy (y_val_white, y_val_score_white,y_test_white, y_test_score_white)
    precision_white, recall_white, tpr_white, tnr_white, pd_white = thres.calculate_precision_metrics(y_test_white, y_test_score_white,threshold_white)

    threshold_black, ba_val_black, ba_test_black = balance_accuracy (y_val_black, y_val_score_black, y_test_black, y_test_score_black)
    precision_black, recall_black, tpr_black, tnr_black, pd_black = thres.calculate_precision_metrics(y_test_black, y_test_score_black,threshold_black)

    eod = fair.get_EOD(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)
    sp = fair.get_SP(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)

    records.append({
        'auroc': auroc,
        'overall threshold': threshold,
        'white threshold': threshold_white,
        'black threshold': threshold_black,
        'overall ba validation': ba_val,
        'overall ba test': ba_test,
        'white ba validation': ba_val_white,
        'white ba test': ba_test_white,
        'black ba validation': ba_val_black,
        'black ba test': ba_test_black,
        'overall precision':precision,
        'overall recall':recall,
        'overall tpr':tpr,
        'overall tnr':tnr,
        'overall pd':pd_overall,
        'white precision':precision_white,
        'white recall':recall_white,
        'white tpr':tpr_white,
        'white tnr':tnr_white,
        'white pd':pd_white,
        'black precision':precision_black,
        'black recall':recall_black,
        'black tpr':tpr_black,
        'black tnr':tnr_black,
        'black pd':pd_black,
        'eod': eod,
        'di': sp,
        })

In [48]:
def balance_accuracy (y_val, y_val_score,y_test, y_test_score):
    
    threshold, _ = thres.get_optimal_threshold_Jvalue (y_val, y_val_score)
    print ("Optimal threshold by J value is ",threshold)

    ba_val = thres.calculate_balanced_accuracy(y_val, y_val_score, threshold)
    print ("Balanced accuracy score of val is ", ba_val)

    ba_test = thres.calculate_balanced_accuracy(y_test, y_test_score, threshold)
    print ("Balanced accuracy score of test is ",ba_test)

    return threshold, ba_val, ba_test

In [49]:
def fairness_metrics (X, y, attribute, random_state):
    X_train, y_train, X_val, y_val, X_test, y_test, X_val_white, X_val_black, y_val_white, y_val_black, X_test_white, X_test_black, y_test_white, y_test_black \
        = fair.split_by_trait_no_protected_trait(X, y, attribute, random_state)
    print("X train", X_train.shape[0])
    print("Y train", y_train.shape[0])
    print(X_val.shape[0], X_val_white.shape[0], X_val_black.shape[0])
    print(y_val.shape[0], y_val_white.shape[0], y_val_black.shape[0])
    print(X_test.shape[0], X_test_white.shape[0], X_test_black.shape[0])
    print(y_test.shape[0], y_test_white.shape[0], y_test_black.shape[0])

    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_train_scaled = max_abs_scaler.fit_transform(X_train)
    X_test_scaled = max_abs_scaler.transform(X_test)
    X_test_white_scaled = max_abs_scaler.transform(X_test_white)
    X_test_black_scaled = max_abs_scaler.transform(X_test_black)
    X_val_scaled = max_abs_scaler.transform(X_val)
    X_val_white_scaled = max_abs_scaler.transform(X_val_white)
    X_val_black_scaled = max_abs_scaler.transform(X_val_black)

    characteristic = attribute + '_included_' + str(random_state) 
    save_prediction ("logic_regression", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("random_forest", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    #save_prediction ("decision_tree", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("gradiant_boosting", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

    get_result ("logic_regression", characteristic, records_lr, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("random_forest", characteristic, records_rf, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    #get_result ("decision_tree", characteristic, records_dt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("gradiant_boosting", characteristic, records_gbt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

In [50]:
records_lr = []
records_rf = []
#records_dt = []
records_gbt = []
for random_state in range(10):
    fairness_metrics (X, y, "Race_W", random_state)

result_lr = pd.DataFrame(records_lr)
result_rf = pd.DataFrame(records_rf)
#result_dt = pd.DataFrame(records_dt)
result_gbt = pd.DataFrame(records_gbt)


X train 65694
Y train 65694
21898 18899 2999
21898 18899 2999
21898 18968 2930
21898 18968 2930




0.25884953726474685
0.2621818978817408
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.43      0.03      0.06      1964

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19851    83]
 [ 1902    62]]
done in 11.378848s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.00      0.00      0.00      1964

    accuracy                           0.91     21898
   macro avg       0.46      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19934     0]
 [ 1964     0]]
done in 6.309473s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.47      0.02      0.03      1964

    accuracy                           0.91     21898
   macro avg       0.69      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19895    39]
 [ 1929    35]]
done in 76.239591s
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.41100000000000003
threshold:0.2, J-value:0.26599999999999996
threshold:0.30000000000000004, J-value:0.151
threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.029
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7051320762931883
Balanced accuracy score of test is  0.6988199806816671
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.404
threshold:0.2, J-value:0.



0.26212332005104655
0.2578680186569327
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.42      0.03      0.06      1926

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19887    85]
 [ 1864    62]]
done in 10.083406s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.00      0.00      0.00      1926

    accuracy                           0.91     21898
   macro avg       0.46      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19972     0]
 [ 1926     0]]
done in 5.807057s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.45      0.02      0.03      1926

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19936    36]
 [ 1896    30]]
done in 76.585532s
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.40700000000000003
threshold:0.2, J-value:0.27299999999999996
threshold:0.30000000000000004, J-value:0.152
threshold:0.4, J-value:0.069
threshold:0.5, J-value:0.033
threshold:0.6000000000000001, J-value:0.013000000000000001
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7039137155925477
Balanced accuracy score of test is  0.6974566053950089
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.406
thresho



0.25945802273225005
0.26226504777522075
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.53      0.03      0.06      2015

    accuracy                           0.91     21898
   macro avg       0.72      0.51      0.50     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19831    52]
 [ 1956    59]]
done in 10.015808s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.00      0.00      0.00      2015

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.82      0.91      0.86     21898

Confusion_matrix
[[19883     0]
 [ 2015     0]]
done in 5.724017s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.53      0.01      0.02      2015

    accuracy                           0.91     21898
   macro avg       0.72      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19861    22]
 [ 1990    25]]
done in 83.942022s
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.394
threshold:0.2, J-value:0.257
threshold:0.30000000000000004, J-value:0.144
threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.028
threshold:0.6000000000000001, J-value:0.009000000000000001
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6970409596887497
Balanced accuracy score of test is  0.7063782931638921
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.384
threshold:0.2, J-value:0.249
thresho



0.2588705224972624
0.2608286573776988
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.45      0.03      0.06      1980

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19841    77]
 [ 1918    62]]
done in 10.379616s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.00      0.00      0.00      1980

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19918     0]
 [ 1980     0]]
done in 5.874747s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.43      0.01      0.02      1980

    accuracy                           0.91     21898
   macro avg       0.67      0.50      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19892    26]
 [ 1960    20]]
done in 79.814765s
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.395
threshold:0.2, J-value:0.26
threshold:0.30000000000000004, J-value:0.153
threshold:0.4, J-value:0.075
threshold:0.5, J-value:0.033
threshold:0.6000000000000001, J-value:0.013000000000000001
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6974371556395601
Balanced accuracy score of test is  0.7093500016735281
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.394
threshold:0.2, J-value:0.259
thresho



0.26307527616800563
0.2563454323731131
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19975
           1       0.49      0.04      0.08      1923

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19892    83]
 [ 1844    79]]
done in 13.039648s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19975
           1       0.00      0.00      0.00      1923

    accuracy                           0.91     21898
   macro avg       0.46      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19975     0]
 [ 1923     0]]
done in 6.444504s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19975
           1       0.49      0.02      0.03      1923

    accuracy                           0.91     21898
   macro avg       0.70      0.51      0.49     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19941    34]
 [ 1890    33]]
done in 88.164378s
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.402
threshold:0.2, J-value:0.264
threshold:0.30000000000000004, J-value:0.147
threshold:0.4, J-value:0.067
threshold:0.5, J-value:0.028
threshold:0.6000000000000001, J-value:0.01
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7010349062300338
Balanced accuracy score of test is  0.7047000247969868
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.397
threshold:0.2, J-value:0.262
threshold:0.3000000000



0.2586009846255978
0.2607431155128406
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.47      0.03      0.06      1971

    accuracy                           0.91     21898
   macro avg       0.69      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19858    69]
 [ 1911    60]]
done in 11.364938s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.00      0.00      0.00      1971

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19927     0]
 [ 1971     0]]
done in 6.703871s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.58      0.01      0.03      1971

    accuracy                           0.91     21898
   macro avg       0.75      0.51      0.49     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19906    21]
 [ 1942    29]]
done in 79.831913s
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.391
threshold:0.2, J-value:0.252
threshold:0.30000000000000004, J-value:0.147
threshold:0.4, J-value:0.07100000000000001
threshold:0.5, J-value:0.026
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.006
threshold:0.8, J-value:0.002
threshold:0.9, J-value:0.001
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.695381286179968
Balanced accuracy score of test is  0.7067297156691941
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.391
threshold:0.2, J-value:0.251
thresh



0.2615435204528365
0.25821949119696097
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
           1       0.49      0.04      0.07      1943

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19881    74]
 [ 1873    70]]
done in 9.741886s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
           1       0.00      0.00      0.00      1943

    accuracy                           0.91     21898
   macro avg       0.46      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19955     0]
 [ 1943     0]]
done in 6.120762s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
           1       0.51      0.01      0.03      1943

    accuracy                           0.91     21898
   macro avg       0.71      0.51      0.49     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19927    28]
 [ 1914    29]]
done in 79.188064s
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.41500000000000004
threshold:0.2, J-value:0.26699999999999996
threshold:0.30000000000000004, J-value:0.147
threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.029
threshold:0.6000000000000001, J-value:0.011
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7072861330045519
Balanced accuracy score of test is  0.7038611709078313
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.41700000000000004
threshold



0.25822992471750866
0.2621811542306066
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.50      0.03      0.06      1991

    accuracy                           0.91     21898
   macro avg       0.71      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19840    67]
 [ 1923    68]]
done in 9.779485s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.00      0.00      0.00      1991

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19907     0]
 [ 1991     0]]
done in 5.705281s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.40      0.01      0.02      1991

    accuracy                           0.91     21898
   macro avg       0.65      0.50      0.49     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19878    29]
 [ 1972    19]]
done in 78.287409s
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.40800000000000003
threshold:0.2, J-value:0.26799999999999996
threshold:0.30000000000000004, J-value:0.144
threshold:0.4, J-value:0.069
threshold:0.5, J-value:0.028
threshold:0.6000000000000001, J-value:0.006999999999999999
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7043435371757607
Balanced accuracy score of test is  0.6968169441443647
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.4120000000000



0.25968601816768644
0.25890719330005973
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
           1       0.49      0.04      0.07      1942

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19883    73]
 [ 1871    71]]
done in 11.953714s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
           1       0.00      0.00      0.00      1942

    accuracy                           0.91     21898
   macro avg       0.46      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19956     0]
 [ 1942     0]]
done in 6.683143s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
           1       0.53      0.02      0.03      1942

    accuracy                           0.91     21898
   macro avg       0.72      0.51      0.49     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19927    29]
 [ 1909    33]]
done in 76.375250s
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.391
threshold:0.2, J-value:0.253
threshold:0.30000000000000004, J-value:0.147
threshold:0.4, J-value:0.07100000000000001
threshold:0.5, J-value:0.037000000000000005
threshold:0.6000000000000001, J-value:0.008
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6957983436348029
Balanced accuracy score of test is  0.6988687419222392
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.389
threshold:0.2, J-valu



0.2599848812794757
0.2649508533895226
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.45      0.03      0.06      2032

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19784    82]
 [ 1966    66]]
done in 9.728797s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.00      0.00      0.00      2032

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.82      0.91      0.86     21898

Confusion_matrix
[[19866     0]
 [ 2032     0]]
done in 6.106532s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.55      0.02      0.03      2032

    accuracy                           0.91     21898
   macro avg       0.73      0.51      0.49     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19841    25]
 [ 2001    31]]
done in 79.536740s
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.40700000000000003
threshold:0.2, J-value:0.26
threshold:0.30000000000000004, J-value:0.153
threshold:0.4, J-value:0.07300000000000001
threshold:0.5, J-value:0.032
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7034212550579646
Balanced accuracy score of test is  0.7069820801337465
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.403
threshol

In [51]:
def add_mean_sd(records, result_table, overall_records, type):
    records.append({
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba validation': result_table["overall ba validation"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'white ba validation': result_table["white ba validation"].mean(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba validation': result_table["black ba validation"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall precision':result_table["overall precision"].mean(),
        'overall recall':result_table["overall recall"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall tnr':result_table["overall tnr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white precision':result_table["white precision"].mean(),
        'white recall':result_table["white recall"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white tnr':result_table["white tnr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black precision':result_table["black precision"].mean(),
        'black recall':result_table["black recall"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black tnr':result_table["black tnr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    records.append({
        'auroc': result_table["auroc"].std(),
        'overall threshold': result_table["overall threshold"].std(),
        'white threshold': result_table["white threshold"].std(),
        'black threshold': result_table["black threshold"].std(),
        'overall ba validation': result_table["overall ba validation"].std(),
        'overall ba test': result_table["overall ba test"].std(),
        'white ba validation': result_table["white ba validation"].std(),
        'white ba test': result_table["white ba test"].std(),
        'black ba validation': result_table["black ba validation"].std(),
        'black ba test': result_table["black ba test"].std(),
        'overall precision':result_table["overall precision"].std(),
        'overall recall':result_table["overall recall"].std(),
        'overall tpr':result_table["overall tpr"].std(),
        'overall tnr':result_table["overall tnr"].std(),
        'overall pd':result_table["overall pd"].std(),
        'white precision':result_table["white precision"].std(),
        'white recall':result_table["white recall"].std(),
        'white tpr':result_table["white tpr"].std(),
        'white tnr':result_table["white tnr"].std(),
        'white pd':result_table["white pd"].std(),
        'black precision':result_table["black precision"].std(),
        'black recall':result_table["black recall"].std(),
        'black tpr':result_table["black tpr"].std(),
        'black tnr':result_table["black tnr"].std(),
        'black pd':result_table["black pd"].std(),
        'eod': result_table["eod"].std(),
        'di': result_table["di"].std(),
        })
    overall_records.append({
        'type': type,
        'auroc': result_table["auroc"].mean(),
        'auroc_std': result_table["auroc"].std(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'ba_std': result_table["overall ba test"].std(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'eod_std': result_table["eod"].std(),
        'di': result_table["di"].mean(),
        'di_std': result_table["di"].std()
        })
    pd_result = pd.DataFrame(records)
    return pd_result, overall_records

In [52]:
overall_table = []
result_lr, overall_records = add_mean_sd (records_lr, result_lr, overall_table, 'lr')
result_rf, overall_records = add_mean_sd (records_rf, result_rf, overall_records, 'rf')
#result_dt, overall_records = add_mean_sd (records_dt, result_dt, overall_records, 'dt')
result_gbt, overall_records = add_mean_sd (records_gbt, result_gbt, overall_records, 'gbt')

result_path='/Users/lifuchen/Desktop/research/resample_data/'
result_lr.to_csv(path.join(result_path,'race-lr-result_no_protected.csv'), index=False)
result_rf.to_csv(path.join(result_path,'race-rf-result_no_protected.csv'), index=False)
#result_dt.to_csv(path.join(result_path,'race-dt-result_no_protected.csv'), index=False)
result_gbt.to_csv(path.join(result_path,'race-gbt-result_no_protected.csv'), index=False)

overall_result = pd.DataFrame(overall_table)
result_path='/Users/lifuchen/Desktop/research/resample_result/'
overall_result.to_csv(path.join(result_path,'race_no_protected.csv'), index=False)
