In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [4]:
from aif360.algorithms.postprocessing.reject_option_classification import RejectOptionClassification
from aif360.datasets import BinaryLabelDataset

In [5]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from functions import*

In [6]:
import os
path=os.path.dirname(os.getcwd())

In [7]:
K=200
e=0.01

var_list=['hoursperweek','age','capitalgain','capitalloss' ,'education-num'] #
var_dim=len(var_list)
pa='race' #'sex'
pa_dict={'Male':1,'Female':0,'White':1,'Black':0}
pd.set_option('future.no_silent_downcasting', True)

messydata=pd.read_csv(path+'/data/adult_csv.csv',usecols=var_list+[pa,'class'])
messydata=messydata.rename(columns={pa:'S','class':'Y'})
messydata['S']=messydata['S'].replace(pa_dict)
messydata['Y']=messydata['Y'].replace({'>50K':1,'<=50K':0})
messydata=messydata[(messydata['S']==1)|(messydata['S']==0)]
for col in var_list+['S','Y']:
    messydata[col]=messydata[col].astype('category')
messydata['W']=1
X=messydata[var_list+['S','W']].to_numpy() # [X,S,W]
y=messydata['Y'].to_numpy() #[Y]

In [91]:
class ROCpostprocess:
    def __init__(self,X_val,y_val,clf):
        self.X_val =X_val
        self.y_val =y_val
        self.model = clf
        self.ROC = self.buildROCusingval()

    def buildbinarydata(self,X,y):
        df=pd.DataFrame(np.concatenate((X,y.reshape(-1,1)), axis=1),columns=var_list+['S','W','Y'])
        binaryLabelDataset = BinaryLabelDataset(
                            favorable_label=1,
                            unfavorable_label=0,
                            df=df[var_list+['S','W','Y']], #df_test.drop('X',axis=1), #[x_list+['S','W','Y']],
                            label_names=['Y'],
                            instance_weights_name=['W'],
                            protected_attribute_names=['S'],
                            privileged_protected_attributes=[np.array([1.0])],
                            unprivileged_protected_attributes=[np.array([0.])])
        return binaryLabelDataset

    def buildROCusingval(self):
        dataset_val = self.buildbinarydata(self.X_val,self.y_val)
        dataset_val_pred = dataset_val.copy(deepcopy=True)
        dataset_val_pred.scores = self.model.predict_proba(dataset_val.features[:,0:var_dim])[:,positive_index].reshape(-1,1)
        positive_index = 1 # positive label
        privileged_groups = [{'S': 1}]
        unprivileged_groups = [{'S': 0}]
        # Metric used (should be one of allowed_metrics)
        metric_name = "Statistical parity difference"
        # Upper and lower bound on the fairness metric used
        metric_ub = 0.05
        metric_lb = -0.05
        ROC = RejectOptionClassification(unprivileged_groups=unprivileged_groups, 
                                        privileged_groups=privileged_groups, 
                                        low_class_thresh=0.01, high_class_thresh=0.99,
                                        num_class_thresh=50, num_ROC_margin=10,
                                        metric_name=metric_name,
                                        metric_ub=metric_ub, metric_lb=metric_lb)
        ROC = ROC.fit(dataset_val, dataset_val_pred)
        print("Optimal classification threshold (with fairness constraints) = %.4f" % ROC.classification_threshold)
        print("Optimal ROC margin = %.4f" % ROC.ROC_margin)
        return ROC

    def postprocess(self,X_test,y_test):
        dataset_test_pred = self.buildbinarydata(X_test,y_test).copy(deepcopy=True)
        dataset_test_pred.scores = self.model.predict_proba(X_test[:,0:var_dim])[:,positive_index].reshape(-1,1)
        dataset_test_pred_transf = self.ROC.predict(dataset_test_pred)
        return dataset_test_pred_transf.convert_to_dataframe()[0]

In [108]:
class Projpostprocess:
    
    def __init__(self,X_test,y_test,x_list,var_list,prediction_model):
        self.model = prediction_model
        self.thresh=0.2
        self.K=200
        self.e=0.01
        self.x_list=x_list
        self.var_list=var_list
        self.var_dim=len(var_list)

        df_test=pd.DataFrame(np.concatenate((X_test,y_test.reshape(-1,1)), axis=1),columns=var_list+['S','W','Y'])
        df_test=df_test.groupby(by=var_list+['S','Y'],as_index=False).sum()
        if len(x_list)>1:
            df_test['X']=[tuple(df_test[x_list].values[r]) for r in range(df_test.shape[0])]
            self.x_range=sorted(set(df_test['X']))
            weight=list(1/(df_test[x_list].max()-df_test[x_list].min())) # because 'education-num' range from 1 to 16 while others 1 to 4
            self.C=c_generate_higher(self.x_range,weight)
        else:
            df_test['X']=df_test[x_list]
            self.x_range=sorted(set(df_test['X']))
            self.C=c_generate(self.x_range)
        self.df_test = df_test
        self.var_range=list(pd.pivot_table(df_test,index=var_list,values=['S','W','Y']).index)
        self.distribution_generator()
# self.px,self.ptx,self.V,self.p0,self.p1 = 

    def distribution_generator(self):
        bin=len(self.x_range)
        dist=rdata_analysis(self.df_test,self.x_range,'X')
        
        dist['v']=[(dist['x_0'][i]-dist['x_1'][i])/dist['x'][i] for i in range(bin)]
        
        dist['t_x']=dist['x'] # #dist['x'] #dist['x_0']*0.5+dist['x_1']*0.5 
        self.px=np.matrix(dist['x']).T
        self.ptx=np.matrix(dist['t_x']).T
        if np.any(dist['x_0']==0): 
            self.p0=np.matrix((dist['x_0']+1.0e-9)/sum(dist['x_0']+1.0e-9)).T
        else:
            self.p0=np.matrix(dist['x_0']).T 
        if np.any(dist['x_1']==0):
            self.p1=np.matrix((dist['x_1']+1.0e-9)/sum(dist['x_1']+1.0e-9)).T
        else:
            self.p1=np.matrix(dist['x_1']).T 
        self.V=np.matrix(dist['v']).T
        self.tv_origin=sum(abs(dist['x_0']-dist['x_1']))/2
        # return px,ptx,V,p0,p1
    
    def coupling_generator(self,method,para=None):
        if method == 'unconstrained':
            coupling=baseline(self.C,self.e,self.px,self.ptx,self.K)
        elif method == 'barycentre':
            coupling=baseline(self.C,self.e,self.p0,self.p1,self.K)
        elif method == 'partial':
            coupling=partial_repair(self.C,self.e,self.px,self.ptx,self.V,para,self.K)
        return coupling
    
    def postprocess(self,method,para=None):
        if method == 'origin':
            y_pred=self.model.predict(np.array(self.df_test[self.var_list]))
            tv = self.tv_origin
        else:
            coupling = self.coupling_generator(method,para)
            if (method == 'unconstrained') or (method == 'partial'):
                y_pred=postprocess(self.df_test,coupling,self.x_list,self.x_range,self.var_list,self.var_range,self.model,self.thresh)
                tv=assess_tv(self.df_test,coupling,self.x_range,self.x_list,self.var_list)
            elif method == 'barycentre':
                y_pred,tv=postprocess_bary(self.df_test,coupling,self.x_list,self.x_range,self.var_list,self.var_range,self.model,self.thresh)
            else:
                print('Unknown method')

        di = DisparateImpact_postprocess(self.df_test,y_pred)
        f1_macro = f1_score(self.df_test['Y'], y_pred, average='macro',sample_weight=self.df_test['W'])
        f1_micro = f1_score(self.df_test['Y'], y_pred, average='micro',sample_weight=self.df_test['W'])
        f1_weighted = f1_score(self.df_test['Y'], y_pred, average='weighted',sample_weight=self.df_test['W'])

        new_row=pd.Series({'DI':di,'f1 macro':f1_macro,'f1 micro':f1_micro,'f1 weighted':f1_weighted,
                           'TV distance':tv,'method':method})
        return new_row.to_frame().T

In [9]:
tv_dist=dict()
for x_name in var_list:
    x_range_single=list(pd.pivot_table(messydata,index=x_name,values=['W'])[('W')].index) 
    dist=rdata_analysis(messydata,x_range_single,x_name)
    tv_dist[x_name]=sum(abs(dist['x_0']-dist['x_1']))/2
x_list=[]
for key,val in tv_dist.items():
    if val>0.08:
        x_list+=[key]

In [94]:
# train val test 4:2:4
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3, random_state=1)

clf=RandomForestClassifier(max_depth=5, random_state=0).fit(X_train[:,0:var_dim],y_train)
ROCpost = ROCpostprocess(X_val,y_val,clf)
df_test_ROC = ROCpost.postprocess(X_test,y_test)

Optimal classification threshold (with fairness constraints) = 0.2100
Optimal ROC margin = 0.0233


In [None]:
C,px,ptx,V

In [110]:
Projpostprocess(X_test,y_test,x_list,var_list,clf)

<__main__.Projpostprocess at 0x295c9c43760>

In [80]:
df_test=pd.DataFrame(np.concatenate((X_test,y_test.reshape(-1,1)), axis=1),columns=var_list+['S','W','Y'])
df_test=df_test.groupby(by=var_list+['S','Y'],as_index=False).sum()

if len(x_list)>1:
    df_test['X']=[tuple(df_test[x_list].values[r]) for r in range(df_test.shape[0])]
    x_range=sorted(set(df_test['X']))
    weight=list(1/(df_test[x_list].max()-df_test[x_list].min())) # because 'education-num' range from 1 to 16 while others 1 to 4
    C=c_generate_higher(x_range,weight)
else:
    df_test['X']=df_test[x_list]
    x_range=sorted(set(df_test['X']))
    C=c_generate(x_range)

bin=len(x_range)
var_range=list(pd.pivot_table(df_test,index=var_list,values=['S','W','Y']).index)


dist=rdata_analysis(df_test,x_range,'X')
dist['t_x']=dist['x'] # #dist['x'] #dist['x_0']*0.5+dist['x_1']*0.5 
dist['v']=[(dist['x_0'][i]-dist['x_1'][i])/dist['x'][i] for i in range(bin)]
px=np.matrix(dist['x']).T
ptx=np.matrix(dist['t_x']).T
if np.any(dist['x_0']==0): 
    p0=np.matrix((dist['x_0']+1.0e-9)/sum(dist['x_0']+1.0e-9)).T
else:
    p0=np.matrix(dist['x_0']).T 
if np.any(dist['x_1']==0):
    p1=np.matrix((dist['x_1']+1.0e-9)/sum(dist['x_1']+1.0e-9)).T
else:
    p1=np.matrix(dist['x_1']).T 
V=np.matrix(dist['v']).T

In [55]:
report=pd.DataFrame(columns=['DI','f1 macro','f1 micro','f1 weighted','TV distance','method'])
for ignore in range(2):
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
    # train val test 4:2:2
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1)
    
    clf=RandomForestClassifier(max_depth=5, random_state=0).fit(X_train[:,0:var_dim],y_train)

    df_test=pd.DataFrame(np.concatenate((X_test,y_test.reshape(-1,1)), axis=1),columns=var_list+['S','W','Y'])
    df_test=df_test.groupby(by=var_list+['S','Y'],as_index=False).sum()

    if len(x_list)>1:
        df_test['X']=[tuple(df_test[x_list].values[r]) for r in range(df_test.shape[0])]
        x_range=sorted(set(df_test['X']))
        weight=list(1/(df_test[x_list].max()-df_test[x_list].min())) # because 'education-num' range from 1 to 16 while others 1 to 4
        C=c_generate_higher(x_range,weight)
    else:
        df_test['X']=df_test[x_list]
        x_range=sorted(set(df_test['X']))
        C=c_generate(x_range)

    bin=len(x_range)
    var_range=list(pd.pivot_table(df_test,index=var_list,values=['S','W','Y']).index)
    dist=rdata_analysis(df_test,x_range,'X')
    dist['t_x']=dist['x'] # #dist['x'] #dist['x_0']*0.5+dist['x_1']*0.5 
    dist['v']=[(dist['x_0'][i]-dist['x_1'][i])/dist['x'][i] for i in range(bin)]
    px=np.matrix(dist['x']).T
    ptx=np.matrix(dist['t_x']).T
    if np.any(dist['x_0']==0): 
        p0=np.matrix((dist['x_0']+1.0e-9)/sum(dist['x_0']+1.0e-9)).T
    else:
        p0=np.matrix(dist['x_0']).T 
    if np.any(dist['x_1']==0):
        p1=np.matrix((dist['x_1']+1.0e-9)/sum(dist['x_1']+1.0e-9)).T
    else:
        p1=np.matrix(dist['x_1']).T 
    V=np.matrix(dist['v']).T

    coupling_base=baseline(C,e,px,ptx,K)
    coupling_bary=baseline(C,e,p0,p1,K)
    coupling_part2=partial_repair(C,e,px,ptx,V,1.0e-3,K)
    # coupling_part3=partial_repair(C,e,px,ptx,V,1.0e-3,K)
    # coupling_total=partial_repair(C,e,px,ptx,V,1.0e-5,K)
    # test_RW=reweighting(df_test)
    
    tv_base=assess_tv(df_test,coupling_base,x_range,x_list,var_list)
    tv_part2=assess_tv(df_test,coupling_part2,x_range,x_list,var_list)
    # tv_part3=assess_tv(df_test,coupling_part3,x_range,x_list,var_list)
    # tv_total=assess_tv(df_test,coupling_total,x_range,x_list,var_list)
    # tv_RW=assess_tv(test_RW,[],x_range,x_list,var_list)

    y_pred=clf.predict(np.array(df_test[var_list]))
    y_pred_base=postprocess(df_test,coupling_base,x_list,x_range,var_list,var_range,clf,thresh)
    y_pred_bary,tv_bary=postprocess_bary(df_test,coupling_bary,x_list,x_range,var_list,var_range,clf,thresh)
    y_pred_part2=postprocess(df_test,coupling_part2,x_list,x_range,var_list,var_range,clf,thresh)
    # y_pred_part3=postprocess(df_test,coupling_part3,x_list,x_range,var_list,var_range,clf)
    # y_pred_total=postprocess(df_test,coupling_total,x_list,x_range,var_list,var_range,clf)
    # y_pred_RW=clf.predict(np.array(test_RW[var_list]))

    new_row=pd.Series({'DI':DisparateImpact_postprocess(df_test,y_pred),
                        'f1 macro':f1_score(df_test['Y'], y_pred, average='macro',sample_weight=df_test['W']),
                        'f1 micro':f1_score(df_test['Y'], y_pred, average='micro',sample_weight=df_test['W']),
                        'f1 weighted':f1_score(df_test['Y'], y_pred, average='weighted',sample_weight=df_test['W']),
                        'TV distance':sum(abs(dist['x_0']-dist['x_1']))/2,'method':'origin'})
    new_row_base=pd.Series({'DI':DisparateImpact_postprocess(df_test,y_pred_base),
                        'f1 macro':f1_score(df_test['Y'], y_pred_base, average='macro',sample_weight=df_test['W']),
                        'f1 micro':f1_score(df_test['Y'], y_pred_base, average='micro',sample_weight=df_test['W']),
                        'f1 weighted':f1_score(df_test['Y'], y_pred_base, average='weighted',sample_weight=df_test['W']),
                        'TV distance':tv_base,'method':'baseline'})
    new_row_bary=pd.Series({'DI':DisparateImpact_postprocess(df_test,y_pred_bary),
                        'f1 macro':f1_score(df_test['Y'], y_pred_bary, average='macro',sample_weight=df_test['W']),
                        'f1 micro':f1_score(df_test['Y'], y_pred_bary, average='micro',sample_weight=df_test['W']),
                        'f1 weighted':f1_score(df_test['Y'], y_pred_bary, average='weighted',sample_weight=df_test['W']),
                        'TV distance':tv_bary,'method':'barycentre'})
    new_row_part2=pd.Series({'DI':DisparateImpact_postprocess(df_test,y_pred_part2),
                        'f1 macro':f1_score(df_test['Y'], y_pred_part2, average='macro',sample_weight=df_test['W']),
                        'f1 micro':f1_score(df_test['Y'], y_pred_part2, average='micro',sample_weight=df_test['W']),
                        'f1 weighted':f1_score(df_test['Y'], y_pred_part2, average='weighted',sample_weight=df_test['W']),
                        'TV distance':tv_part2,'method':'partial repair2'})
    # new_row_part3=pd.Series({'DI':DisparateImpact_postprocess(df_test,y_pred_part3),
    #                     'f1 macro':f1_score(df_test['Y'], y_pred_part3, average='macro',sample_weight=df_test['W']),
    #                     'f1 micro':f1_score(df_test['Y'], y_pred_part3, average='micro',sample_weight=df_test['W']),
    #                     'f1 weighted':f1_score(df_test['Y'], y_pred_part3, average='weighted',sample_weight=df_test['W']),
    #                     'TV distance':tv_part3,'method':'partial repair3'})
    # new_row_total=pd.Series({'DI':DisparateImpact_postprocess(df_test,y_pred_total),
    #                     'f1 macro':f1_score(df_test['Y'], y_pred_total, average='macro',sample_weight=df_test['W']),
    #                     'f1 micro':f1_score(df_test['Y'], y_pred_total, average='micro',sample_weight=df_test['W']),
    #                     'f1 weighted':f1_score(df_test['Y'], y_pred_total, average='weighted',sample_weight=df_test['W']),
    #                     'TV distance':tv_total,'method':'total repair'})
    # new_row_RW=pd.Series({'DI':DisparateImpact_postprocess(test_RW,y_pred_RW),
    #                     'f1 macro':f1_score(test_RW['Y'], y_pred_RW, average='macro',sample_weight=test_RW['W']),
    #                     'f1 micro':f1_score(test_RW['Y'], y_pred_RW, average='micro',sample_weight=test_RW['W']),
    #                     'f1 weighted':f1_score(test_RW['Y'], y_pred_RW, average='weighted',sample_weight=test_RW['W']),
    #                     'TV distance':tv_RW,'method':'reweighting'})
    
    report = pd.concat([report,new_row.to_frame().T,new_row_base.to_frame().T,new_row_bary.to_frame().T,new_row_part2.to_frame().T], ignore_index=True)
    #report = pd.concat([report,new_row.to_frame().T,new_row_base.to_frame().T,new_row_part2.to_frame().T,new_row_part3.to_frame().T,new_row_part4.to_frame().T], ignore_index=True) #,new_row_part4.to_frame().T
    # report = pd.concat([report,new_row.to_frame().T,new_row_base.to_frame().T,new_row_bary.to_frame().T,new_row_part3.to_frame().T,new_row_total.to_frame().T], ignore_index=True) #new_row_part2.to_frame().T,
    #report = pd.concat([report,new_row.to_frame().T,new_row_base.to_frame().T,new_row_bary.to_frame().T,new_row_part2.to_frame().T,new_row_part3.to_frame().T,new_row_part4.to_frame().T], ignore_index=True) #,new_row_part4.to_frame().T

In [60]:
num_thresh = 10
ba_arr = np.zeros(num_thresh)
class_thresh_arr = np.linspace(0.1, 0.91, num_thresh)
print(class_thresh_arr)

[0.1  0.19 0.28 0.37 0.46 0.55 0.64 0.73 0.82 0.91]


In [66]:
coupling_part2=partial_repair(C,e,px,ptx,V,1.0e-3,K)

In [67]:
for idx, thresh in enumerate(class_thresh_arr):
    y_pred_base=postprocess(df_test,coupling_part2,x_list,x_range,var_list,var_range,clf,thresh)
    ba_arr[idx] = f1_score(df_test['Y'], y_pred_base, average='micro',sample_weight=df_test['W'])

best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]
best_thresh = class_thresh_arr[best_ind]

print("Best balanced accuracy (no fairness constraints) = %.4f" % np.max(ba_arr))
print("Optimal classification threshold (no fairness constraints) = %.4f" % best_thresh)

Best balanced accuracy (no fairness constraints) = 0.8144
Optimal classification threshold (no fairness constraints) = 0.1900


In [68]:
ba_arr

array([0.734539  , 0.8143603 , 0.81366058, 0.81387588, 0.80892405,
       0.80865493, 0.8041337 , 0.80477959, 0.80472577, 0.80106572])

In [56]:
report

Unnamed: 0,DI,f1 macro,f1 micro,f1 weighted,TV distance,method
0,0.353593,0.677809,0.81619,0.786812,0.19143,origin
1,0.353593,0.677809,0.81619,0.786812,0.191351,baseline
2,0.73633,0.535094,0.702298,0.679023,4.9e-05,barycentre
3,0.982744,0.627093,0.707142,0.716284,0.025413,partial repair2
4,0.679819,0.675185,0.814967,0.785183,0.150419,reweighting
5,0.44555,0.690283,0.823241,0.797115,0.195836,origin
6,0.44555,0.690283,0.823241,0.797115,0.195673,baseline
7,0.931475,0.54916,0.701222,0.687002,1e-05,barycentre
8,0.872677,0.6561,0.753431,0.752417,0.025543,partial repair2
9,0.77549,0.688621,0.822451,0.79609,0.160632,reweighting


In [51]:
report

Unnamed: 0,DI,f1 macro,f1 micro,f1 weighted,TV distance,method
0,0.449827,0.678789,0.817833,0.789005,0.205545,origin
1,0.449827,0.678789,0.817833,0.789005,0.205545,baseline
2,1.04678,0.547657,0.641296,0.654991,7.5e-05,barycentre
3,0.75701,0.658003,0.786661,0.7674,0.012038,partial repair2
4,0.895749,0.677025,0.816853,0.787855,0.16003,reweighting
5,0.405739,0.678773,0.817219,0.787287,0.210459,origin
6,0.405739,0.678773,0.817219,0.787287,0.210459,baseline
7,0.766167,0.538426,0.652301,0.656396,0.001311,barycentre
8,0.757408,0.658621,0.786661,0.7662,0.012319,partial repair2
9,0.843715,0.674337,0.815176,0.784537,0.164602,reweighting


In [9]:
report.to_csv(path+'/data/report_postprocess_bary'+str(pa)+'.csv',index=None)