In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ncpol2sdpa import *
from random import shuffle
from tqdm import tqdm

In [3]:
from aif360.datasets import StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric, utils
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import ClassificationMetric
# Odds equalizing post-processing algorithm
from aif360.algorithms.postprocessing.calibrated_eq_odds_postprocessing import CalibratedEqOddsPostprocessing
from aif360.algorithms.postprocessing.eq_odds_postprocessing import EqOddsPostprocessing
from aif360.algorithms.postprocessing.reject_option_classification import RejectOptionClassification

In [5]:
default_mappings = {
    #'label_maps': [{1.0: 'Did recid.', 0.0: 'No recid.'}],
    'protected_attribute_maps': [{0.0: 'Male', 1.0: 'Female'},
                                 {1.0: 'Caucasian', 0.0: 'Not Caucasian'}]
}

class Compas(StandardDataset):

    def __init__(self, df,label_name='is_recid', favorable_classes=[0],
                 protected_attribute_names=['sex', 'race'],
                 privileged_classes=[['Female'], ['Caucasian']],
                 instance_weights_name=None,
                 categorical_features=['age_cat', 'c_charge_degree'],
                 features_to_keep=['sex', 'age', 'age_cat', 'race',
                     'juv_fel_count', 'juv_misd_count', 'juv_other_count',
                     'priors_count', 'c_charge_degree'],
                 features_to_drop=[], na_values=[],
                 custom_preprocessing=[],
                 metadata=default_mappings):
        """See :obj:`StandardDataset` for a description of the arguments.
        Note: The label value 0 in this case is considered favorable (no
        recidivism).
        Examples:
            In some cases, it may be useful to keep track of a mapping from
            `float -> str` for protected attributes and/or labels. If our use
            case differs from the default, we can modify the mapping stored in
            `metadata`:
            >>> label_map = {1.0: 'Did recid.', 0.0: 'No recid.'}
            >>> protected_attribute_maps = [{1.0: 'Male', 0.0: 'Female'}]
            >>> cd = CompasDataset(protected_attribute_names=['sex'],
            ... privileged_classes=[['Male']], metadata={'label_map': label_map,
            ... 'protected_attribute_maps': protected_attribute_maps})
            Now this information will stay attached to the dataset and can be
            used for more descriptive visualizations.
        """
        super(Compas, self).__init__(df=df, label_name=label_name,
            favorable_classes=favorable_classes,
            protected_attribute_names=protected_attribute_names,
            privileged_classes=privileged_classes,
            instance_weights_name=instance_weights_name,
            categorical_features=categorical_features,
            features_to_keep=features_to_keep,
            features_to_drop=features_to_drop, na_values=na_values,
            custom_preprocessing=custom_preprocessing, metadata=metadata)

def split_index(index):
    length = len(index)
    # define the ratios 8:2
    train_len = int(length * 0.8)

    # split the dataframe
    #idx = [*range(length)]
    shuffle(index)  # shuffle the index
    I_train = index[:train_len]
    I_test = index[train_len:length]

    return [I_train,I_test]

def aif_postprocess(dataframe,Itrain,Itest,method):
    Strain,Strain_pred=compas_pred(Itrain)
    Stest,Stest_pred=compas_pred(Itest)
    randseed = 12345679
    if method=='RejectOption':
        cpp=RejectOptionClassification(privileged_groups = privileged_groups,unprivileged_groups = unprivileged_groups)
    elif method=='EqOddsPostprocessing':
        cpp=EqOddsPostprocessing(privileged_groups = privileged_groups,unprivileged_groups = unprivileged_groups)
    else:
    #cost_constraint = "fnr" # "fnr", "fpr", "weighted"
    #else:
        cpp=CalibratedEqOddsPostprocessing(privileged_groups = privileged_groups,unprivileged_groups = unprivileged_groups,
                                             cost_constraint=method)  #,seed=randseed
    cpp = cpp.fit(Strain,Strain_pred)
    return cpp.predict(Stest_pred).scores #labels #[Stest.labels,

def compas_pred(index):
    Sindex=Compas(df=dataframe.loc[index])
    Sindex_pred=Sindex.copy(deepcopy=True)
    Sindex_pred.scores=np.array([[1-i/10] for i in dataframe.loc[index,'decile_score'].tolist()])
    #Sindex_pred.labels = np.where(Sindex_pred.scores >= thresh,Sindex_pred.favorable_label,Sindex_pred.unfavorable_label)
    #y_pred = np.zeros_like(Sindex.labels)
    #y_pred[dataframe.loc[index,'decile_score']>thresh]=Sindex.unfavorable_label
    #Sindex_pred.labels=y_pred
    return [Sindex,Sindex_pred]

def preprocess(filepath,nrows,column_names):
    dataframe = pd.read_csv(filepath, index_col='id',nrows=nrows)
    dataframe=dataframe[(dataframe["race"]=='African-American')|(dataframe["race"]=='Caucasian')]
    dataframe[(dataframe.days_b_screening_arrest <= 30)
                & (dataframe.days_b_screening_arrest >= -30)
                & (dataframe.is_recid != -1)
                & (dataframe.c_charge_degree != 'O')
                & (dataframe.score_text != 'N/A')]
    dataframe=dataframe[column_names] #[features+labels]
    dataframe=dataframe.dropna(axis=0, how='any',thresh=None,subset=None,inplace=False)
    
    dataframe['priors_total_count']=(dataframe['juv_fel_count']+dataframe['juv_misd_count']+dataframe['priors_count'])/3 #+dataframe['juv_other_count']
    
    # base rate: 1-P(is_recid=1|S=s)
    base0=(1-dataframe[(dataframe["race"]=='Caucasian') & (dataframe["is_recid"]==1)].shape[0]/dataframe[dataframe["race"]=='Caucasian'].shape[0])*100
    base1=(1-dataframe[(dataframe["race"]!='Caucasian') & (dataframe["is_recid"]==1)].shape[0]/ dataframe[dataframe["race"]!='Caucasian'].shape[0])*100
    return dataframe,[base0,base1]

def training_process(dataframe,Itrain,method): #Itrain,Itest,method
    Dtrain=dataframe.loc[Itrain]
    I0_train=Dtrain[Dtrain['race']=='Caucasian'].index
    I1_train=Dtrain[Dtrain['race']!='Caucasian'].index
    level = 1
    
    if method=='subgroup-fair':
        # Decision Variables
        a = generate_operators("a", n_vars=2, hermitian=True, commutative=False) # age < 25
        c = generate_operators("c", n_vars=2, hermitian=True, commutative=False) # total number of previous convictions
        d = generate_operators("d", n_vars=2, hermitian=True, commutative=False)
        e = generate_operators("e", n_vars=2, hermitian=True, commutative=False)
        z = generate_operators("z", n_vars=3, hermitian=True, commutative=True)

        # Constraints
        ine1 = [z[0]+Dtrain.loc[i,"is_recid"] - a[0]*int(Dtrain.loc[i,'age']<25) - c[0]*Dtrain.loc[i,'priors_total_count'] - d[0]*Dtrain.loc[i,'decile_score'] + e[0] for i in I0_train]
        ine2 = [z[0]-Dtrain.loc[i,"is_recid"] + a[0]*int(Dtrain.loc[i,'age']<25) + c[0]*Dtrain.loc[i,'priors_total_count'] + d[0]*Dtrain.loc[i,'decile_score'] + e[0] for i in I0_train]
        ine3 = [z[0]+Dtrain.loc[i,"is_recid"] - a[1]*int(Dtrain.loc[i,'age']<25) - c[1]*Dtrain.loc[i,'priors_total_count'] - d[1]*Dtrain.loc[i,'decile_score'] + e[1] for i in I1_train]
        ine4 = [z[0]-Dtrain.loc[i,"is_recid"] + a[1]*int(Dtrain.loc[i,'age']<25) + c[1]*Dtrain.loc[i,'priors_total_count'] + d[1]*Dtrain.loc[i,'decile_score'] + e[1] for i in I1_train]
        max1 =[z[1]-sum((Dtrain.loc[i,"is_recid"]-a[0]*int(Dtrain.loc[i,'age']<25) - c[0]*Dtrain.loc[i,'priors_total_count'] - d[0]*Dtrain.loc[i,'decile_score'] + e[0])**2 for i in I0_train)/len(I0_train)]
        max2 =[z[2]-sum((Dtrain.loc[i,"is_recid"]-a[1]*int(Dtrain.loc[i,'age']<25) - c[1]*Dtrain.loc[i,'priors_total_count'] - d[1]*Dtrain.loc[i,'decile_score'] + e[1])**2 for i in I1_train)/len(I1_train)]
        
        obj_D = z[0] + z[1] + z[2] + 0.5*(z[2]-z[1]) #+ 0.01*(e[0]**2 + e[1]**2)
 
    elif method=='instant-fair':

        # Decision Variables
        a = generate_operators("a", n_vars=2, hermitian=True, commutative=False) # age < 25
        c = generate_operators("c", n_vars=2, hermitian=True, commutative=False) # total number of previous convictions
        d = generate_operators("d", n_vars=2, hermitian=True, commutative=False)
        e = generate_operators("e", n_vars=2, hermitian=True, commutative=False)
        z = generate_operators("z", n_vars=2, hermitian=True, commutative=True)

        # Constraints
        ine1 = [(z[0]+Dtrain.loc[i,"is_recid"] - a[0]*int(Dtrain.loc[i,'age']<25) - c[0]*Dtrain.loc[i,'priors_total_count'] - d[0]*Dtrain.loc[i,'decile_score'] + e[0])/len(I0_train) for i in I0_train]
        ine2 = [(z[0]-Dtrain.loc[i,"is_recid"] + a[0]*int(Dtrain.loc[i,'age']<25) + c[0]*Dtrain.loc[i,'priors_total_count'] + d[0]*Dtrain.loc[i,'decile_score'] + e[0])/len(I0_train) for i in I0_train]
        ine3 = [(z[0]+Dtrain.loc[i,"is_recid"] - a[1]*int(Dtrain.loc[i,'age']<25) - c[1]*Dtrain.loc[i,'priors_total_count'] - d[1]*Dtrain.loc[i,'decile_score'] + e[1])/len(I1_train) for i in I1_train]
        ine4 = [(z[0]-Dtrain.loc[i,"is_recid"] + a[1]*int(Dtrain.loc[i,'age']<25) + c[1]*Dtrain.loc[i,'priors_total_count'] + d[1]*Dtrain.loc[i,'decile_score'] + e[1])/len(I1_train) for i in I1_train]
        max1 = [(z[1]+(Dtrain.loc[i,"is_recid"]-a[0]*int(Dtrain.loc[i,'age']<25) - c[0]*Dtrain.loc[i,'priors_total_count'] - d[0]*Dtrain.loc[i,'decile_score'] + e[0]))/len(I0_train) for i in I0_train]
        max2 = [(z[1]-(Dtrain.loc[i,"is_recid"]-a[1]*int(Dtrain.loc[i,'age']<25) - c[1]*Dtrain.loc[i,'priors_total_count'] - d[1]*Dtrain.loc[i,'decile_score'] + e[1]))/len(I1_train) for i in I1_train]
       
        obj_D = z[0] + z[1] #+ 0.01*(e[0]**2 + e[1]**2)
    
    ine_D=ine1+ine2+ine3+ine4+max1+max2
    sdp_D = SdpRelaxation(variables = flatten([a,c,d,e,z]), verbose = 2)
    sdp_D.get_relaxation(level, objective=obj_D, inequalities=ine_D)
    #sdp_D.solve(solver='mosek')
    sdp_D.solve(solver='sdpa', solverparameters={"executable":"sdpa_gmp","executable": "C:\\Users\\zhouq\\Documents\\sdpa7-windows\\sdpa.exe"})
    print(sdp_D.primal, sdp_D.dual, sdp_D.status)
    
    return [sdp_D[a[0]],sdp_D[a[1]],sdp_D[c[0]],sdp_D[c[1]],sdp_D[d[0]],sdp_D[d[1]],sdp_D[e[0]],sdp_D[e[1]]]

def normalize_score1(arr):
    # Min-Max normalized scores after deleting outliers.
     #Outliers are set to 0 (if too small) or 1 (if too large) directly.
    outlierPosition=detect_outliers(arr,3)
    arr_clean = np.delete(arr,outlierPosition)
    #arr_clean=arr
    arr_min=np.min(arr_clean)
    arr_max=np.max(arr_clean)

    normalized_arr = np.array([round(float(x - arr_min)/(arr_max - arr_min),1) for x in arr])
    #arr_nor = [int(10*float(x - np.mean(arr)/np.std(arr)) ) for x in arr]
    normalized_arr[normalized_arr>10]=1
    normalized_arr[normalized_arr<0]=0
    return normalized_arr

def detect_outliers(data,threshold):
    # return the location of outliers.
    mean_d = np.mean(data)
    std_d = np.std(data)
    outliers = []
    for i in range(len(data)):
        z_score= (data[i] - mean_d)/std_d 
        if np.abs(z_score) > threshold:
            outliers.append(i)
    return outliers
    
def new_postprocess(dataframe,Itrain,Itest,method):
    a0,a1,c0,c1,d0,d1,e0,e1=training_process(dataframe,Itrain,method)  
    arr=[]
    for i in Itest:
        if dataframe.loc[i,'race']=='Caucasian':
            arr+=[a0*int(dataframe.loc[i,'age']<25) + c0*dataframe.loc[i,'priors_total_count'] + d0*dataframe.loc[i,'decile_score'] + e0]
        elif dataframe.loc[i,'race']!='Caucasian':
            arr+=[a1*int(dataframe.loc[i,'age']<25) + c1*dataframe.loc[i,'priors_total_count'] + d1*dataframe.loc[i,'decile_score'] + e1]
    
    normalized_arr=normalize_score1(np.array(arr))
    return normalized_arr 

def perf_measure(y_actual, y_hat):
# Output: Positive rate, False positive rate; False negative rate; True positive rate
# Positive event is being predicted not to re-offend  0
# Negative event is being predicted to re-offend  1
    TP = 0
    TN = 0
    FP = 0 # False Positive
    FN = 0 # False Negative
    
    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==0:
            TP += 1
        if y_hat[i]==0 and y_actual[i]!=y_hat[i]:
            FP += 1
        if y_actual[i]==y_hat[i]==1:
            TN += 1
        if y_hat[i]==1 and y_actual[i]!=y_hat[i]:
            FN += 1
    
    #PR =(TP+FP)/len(y_hat) # Positive rate
    NR =(TN+FN)/len(y_hat) # Negative rate
    FPR=FP/(FP+TN+10**(-6)) # False positive rate # + 0.001 to avoid being divided by zero # 干了坏事没被发现
    #TPR=TP/(TP+FN+10**(-6)) # True positive rate # recall/sensitivity # 没干坏事没被冤枉
    FNR=FN/(FN+TP+10**(-6)) # False Omission Rate # 没干坏事却被冤枉
    PPV=TP/(TP+FP+10**(-6)) # positive predictive value (PPV) precision #  成功预测出好人
    NPV=TN/(TN+FN+10**(-6)) # negative predictive value (NPV) # 成功预测出坏人
    inACC=FP+FN # False prediction number
    
    return [NR,FPR,FNR,PPV,NPV,inACC]

def fair_metric(Dtest,I0_test,I1_test,score_name,thresh,base_rate):
    # base_rate override thresh
    if len(base_rate)!=0:
        th0=np.percentile(Dtest[score_name],[base_rate[0]])[0]
        th1=np.percentile(Dtest[score_name],[base_rate[1]])[0]
    else:
        th0=np.percentile(Dtest[score_name],[thresh])[0]
        th1=th0
  
    #print('is I0_test = I1_test: ',len(I0_test)==len(I1_test))
    y_actual_I0=Dtest.loc[I0_test,"is_recid"].tolist()
    y_hat_I0=(Dtest.loc[I0_test,score_name]>=th0).astype(int).tolist()
    #y_compas_I0=(compas_test.loc[I0_test,'compas_decile_score']>=th).astype(int).tolist()

    y_actual_I1=Dtest.loc[I1_test,"is_recid"].tolist()
    y_hat_I1=(Dtest.loc[I1_test,score_name]>=th1).astype(int).tolist()
    #y_compas_I1=(compas_test.loc[I1_test,'compas_decile_score']>=th).astype(int).tolist()
    
    perf_I0=perf_measure(y_actual_I0, y_hat_I0)
    perf_I1=perf_measure(y_actual_I1, y_hat_I1)

    IND=abs(perf_I0[0]-perf_I1[0])
    SP =abs(perf_I0[1]-perf_I1[1]+abs(perf_I1[2]-perf_I0[2]))  # abs(perf_I0[1]-perf_I1[1])+
    #print((perf_I0[2]-perf_I1[2]))
    SF =abs(perf_I0[3]-perf_I1[3]+abs(perf_I0[4]-perf_I1[4])) #abs(perf_I0[3]-perf_I1[3])+
    #print((perf_I0[4]-perf_I1[4]))
    INA=(perf_I0[5]+perf_I1[5])/(len(I0_test)+len(I1_test)) #Dtest.shape[0] # perdiction performance -- inaccuracy
    return [IND,SP,SF,INA]

In [6]:
level=1
filepath = 'compas-scores-two-years.csv'
nrows=1200

features=['sex', 'age', 'age_cat', 'race', 'juv_fel_count', 'juv_misd_count', 'juv_other_count','priors_count', 'c_charge_degree']
labels=["is_recid",'decile_score']
performace=['IND','SP','SF','INA','INDrw','SPrw','SFrw','INArw']

unprivileged_groups = [{'race': 1}]  
privileged_groups = [{'race': 0}]

dataframe,base_rate=preprocess(filepath,nrows,features+labels)

In [7]:
base_rate

[59.05707196029777, 46.179401993355484]

In [8]:
sum(dataframe.race=='Caucasian')

403

In [9]:
sum(dataframe.race!='Caucasian')

602

In [10]:
## =================
trials=50

methods=['COMPAS',"weighted"] #,"subgroup-fair","instant-fair"

metric=pd.DataFrame(columns=performace+['type','thresh','trial'])
for ignore in range(trials):
    Itrain,Itest=split_index(dataframe.index.tolist())
    Dtest=dataframe.loc[Itest]
    I0_test=Dtest[Dtest['race']=='Caucasian'].index
    I1_test=Dtest[Dtest['race']!='Caucasian'].index
    
    # update scores:
    Dtest['COMPAS']=Dtest['decile_score']/10
    for m in methods[1:]:
        if m not in ['subgroup-fair',"instant-fair"]:
            pp=aif_postprocess(dataframe,Itrain,Itest,m).flatten()
        else:
            pp=new_postprocess(dataframe,Itrain,Itest,m)
        i=0
        for j in Itest:
            Dtest.loc[j,m]=pp[i]
            i+=1

    len_rw=min(len(I0_test),len(I1_test))
    I0_test_rw=I0_test[range(len_rw)]
    I1_test_rw=I1_test[range(len_rw)]
    Dtest_rw=Dtest.loc[I0_test_rw.tolist()+I1_test_rw.tolist()]
    
    # update labels, based on thresholds
    #all_thresh = np.linspace(0.2, 0.8, 10)
    #for thresh in all_thresh:  
    thresh=[]
    for m in methods:
        model_perf=fair_metric(Dtest,I0_test,I1_test,m,thresh,base_rate)
        model_perf_rw=fair_metric(Dtest_rw,I0_test_rw,I1_test_rw,m,thresh,base_rate)
        metric=metric.append({'IND':model_perf[0],'SP':model_perf[1],'SF':model_perf[2],'INA':model_perf[3],
                              'INDrw':model_perf_rw[0],'SPrw':model_perf_rw[1],'SFrw':model_perf_rw[2],'INArw':model_perf_rw[3],
                              'type':m,'thresh':0.5,'trial':ignore},ignore_index=True)

metric.to_csv('data/COMPAS4_metric_AIF360.csv',index=None)

In [13]:
## ====================
trials=5

methods=['COMPAS',"fnr", "fpr", "weighted",'RejectOption','EqOddsPostprocessing'] #"fnr", "fpr", "weighted"

metric=pd.DataFrame(columns=performace+['type','thresh','trial'])

for ignore in tqdm(range(trials)):
    
    Itrain,Itest=split_index(dataframe.index.tolist())
    Dtest=dataframe.loc[Itest]
    I0_test=Dtest[Dtest['race']=='Caucasian'].index
    I1_test=Dtest[Dtest['race']!='Caucasian'].index

    # update scores:
    Dtest['COMPAS']=Dtest['decile_score']/10
    for m in methods[1:]:
        if m not in ['subgroup-fair',"instant-fair"]:
            pp=aif_postprocess(dataframe,Itrain,Itest,m).flatten()
        else:
            pp=new_postprocess(dataframe,Itrain,Itest,m)
        i=0
        for j in Itest:
            Dtest.loc[j,m]=pp[i]
            i+=1
            
    len_rw=min(len(I0_test),len(I1_test))
    I0_test_rw=I0_test[range(len_rw)]
    I1_test_rw=I1_test[range(len_rw)]
    Dtest_rw=Dtest.loc[I0_test_rw.tolist()+I1_test_rw.tolist()]

    # update labels, based on thresholds
    all_thresh = np.linspace(20, 80, 10)
    for thresh in all_thresh:  
        for m in methods:
            model_perf=fair_metric(Dtest,I0_test,I1_test,m,thresh,[]) 
            model_perf_rw=fair_metric(Dtest_rw,I0_test_rw,I1_test_rw,m,thresh,[])
            metric=metric.append({'IND':model_perf[0],'SP':model_perf[1],'SF':model_perf[2],'INA':model_perf[3],
                                  'INDrw':model_perf_rw[0],'SPrw':model_perf_rw[1],'SFrw':model_perf_rw[2],'INArw':model_perf_rw[3],
                                  'type':m,'thresh':round(thresh),'trial':ignore},ignore_index=True)

metric.to_csv('data/AIF3_metric.csv',index=None)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:03<00:00, 12.80s/it]


In [12]:
## ===================
trials=50

methods=['COMPAS',"weighted"] #,"subgroup-fair","instant-fair"

metric=pd.DataFrame(columns=performace+['type','thresh','trial'])

for ignore in tqdm(range(trials)):
    
    Itrain,Itest=split_index(dataframe.index.tolist())
    Dtest=dataframe.loc[Itest]
    I0_test=Dtest[Dtest['race']=='Caucasian'].index
    I1_test=Dtest[Dtest['race']!='Caucasian'].index

    # update scores:
    Dtest['COMPAS']=Dtest['decile_score']/10
    for m in methods[1:]:
        if m not in ['subgroup-fair',"instant-fair"]:
            pp=aif_postprocess(dataframe,Itrain,Itest,m).flatten()
        else:
            pp=new_postprocess(dataframe,Itrain,Itest,m)
        i=0
        for j in Itest:
            Dtest.loc[j,m]=pp[i]
            i+=1
            
    len_rw=min(len(I0_test),len(I1_test))
    I0_test_rw=I0_test[range(len_rw)]
    I1_test_rw=I1_test[range(len_rw)]
    Dtest_rw=Dtest.loc[I0_test_rw.tolist()+I1_test_rw.tolist()]

    # update labels, based on thresholds
    all_thresh = np.linspace(20, 80, 10)
    for thresh in all_thresh:  
        for m in methods:
            model_perf=fair_metric(Dtest,I0_test,I1_test,m,thresh,[]) 
            model_perf_rw=fair_metric(Dtest_rw,I0_test_rw,I1_test_rw,m,thresh,[])
            metric=metric.append({'IND':model_perf[0],'SP':model_perf[1],'SF':model_perf[2],'INA':model_perf[3],
                                  'INDrw':model_perf_rw[0],'SPrw':model_perf_rw[1],'SFrw':model_perf_rw[2],'INArw':model_perf_rw[3],
                                  'type':m,'thresh':round(thresh),'trial':ignore},ignore_index=True)

metric.to_csv('data/COMPAS4_metric_thresh_AIF360.csv',index=None)

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [00:08<00:00,  5.98it/s]
