In [1]:
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))
import warnings
warnings.filterwarnings('ignore')

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from lightgbm import LGBMClassifier

from other_paper.balancers  import MulticlassBalancer

from fmclp.cuae_metric import cuae
from fmclp.get_data import get_data

### COMPAS 

In [2]:
compas = get_data('compas')
compas

Unnamed: 0,ScaleSet_ID,RecSupervisionLevel,Scale_ID,attr,target,Agency_Text_DRRD,Agency_Text_PRETRIAL,Agency_Text_Probation,Sex_Code_Text_Male,ScaleSet_Risk and Prescreen,...,CustodyStatus_Prison Inmate,CustodyStatus_Probation,CustodyStatus_Residential Program,MaritalStatus_Married,MaritalStatus_Separated,MaritalStatus_Significant Other,MaritalStatus_Single,MaritalStatus_Unknown,MaritalStatus_Widowed,AssessmentType_New
0,22,1,7,0,0,False,True,False,True,True,...,False,False,False,False,False,False,True,False,False,True
1,22,1,8,0,0,False,True,False,True,True,...,False,False,False,False,False,False,True,False,False,True
2,22,1,18,0,0,False,True,False,True,True,...,False,False,False,False,False,False,True,False,False,True
3,22,1,7,0,0,False,True,False,True,True,...,False,False,False,True,False,False,False,False,False,True
4,22,1,8,0,0,False,True,False,True,True,...,False,False,False,True,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60838,22,3,8,1,2,False,False,True,True,True,...,False,False,False,False,False,False,True,False,False,True
60839,22,3,18,1,2,False,False,True,True,True,...,False,False,False,False,False,False,True,False,False,True
60840,22,1,7,0,0,False,True,False,True,True,...,False,False,False,False,False,False,True,False,False,True
60841,22,1,8,0,1,False,True,False,True,True,...,False,False,False,False,False,False,True,False,False,True


In [6]:
accuracy_list = []
diff_list = []

for i in range(10): 
    y = compas.drop('target', axis=1)
    x = compas['target']
    y_train, y_test, x_train, x_test = train_test_split(y, x, test_size=0.3)
    estimator = LGBMClassifier(verbose=-1)
    estimator.fit(y_train, x_train)
    estimator_pred = estimator.predict(y_test)

    a = y_train['attr'].values
    y = x_train.values
    y_ = np.array(estimator.predict(y_train))

    compas_balancer = MulticlassBalancer(y=y, y_=y_, a=a)
    compas_balancer.adjust(goal='odds') 
    compas_preds = compas_balancer.predict(y_=np.array(estimator.predict(y_test)), a=y_test['attr'].values)

    accuracy = accuracy_score(compas_preds, x_test)
    metric = cuae(y_true=x_test, y_pred=compas_preds, sensitive_features=y_test['attr'].values)['diff']

    accuracy_list.append(accuracy)
    diff_list.append(metric)
    print(i+1)
    
accuracy_list = np.array(accuracy_list)
diff_list = np.array(diff_list)

1
2
3
4
5
6
7
8
9
10


In [7]:
with open("../../results/comparison_results/compas.txt", 'w') as f:
    f.write(f"""accuracy: {accuracy_list}
diff: {diff_list}
accuracy_mean: {accuracy_list.mean()}
diff_mean: {diff_list.mean()}""")

In [7]:
print(f"""average_accuracy: {accuracy_list.mean()}
average_diff: {diff_list.mean()}""")

average_accuracy: 0.7332438503259737
average_diff: 0.061553376460736595
