In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ncpol2sdpa import *
from random import shuffle

# Clean data

In [4]:
def prepare_data(nrows): #compas-scores-two-years
    """
    Load the data.
    """
    compas_ori=pd.read_csv("compas-scores-two-years.csv",nrows=nrows,usecols=[*range(7,15),24])
        
    """
    Clean data.
    """
    compas_ori=compas_ori[(compas_ori["race"]=='African-American')|(compas_ori["race"]=='Caucasian')]
    length=compas_ori.shape[0]
    compas_ori.index=[*range(length)]

    for i in range(length):
        # race 1 -> African-American
        # race 0 -> Caucasian
        if compas_ori.loc[i,"race"]=='Caucasian':
            compas_ori.loc[i,"race"]=0
        elif compas_ori.loc[i,"race"]=='African-American':
            compas_ori.loc[i,"race"]=1 

        # total number of previous convictions
        prior_counts=compas_ori.loc[i,'juv_fel_count']+compas_ori.loc[i,'juv_misd_count']+compas_ori.loc[i,'priors_count'] #+compas_ori.loc[i,'juv_other_count']
        #if prior_count >= 20:
        #    compas_ori.loc[i,'priors_total_count']=25
        #else: 
        compas_ori.loc[i,'priors_total_count']=[prior_counts/3]
        #compas_ori.loc[i,'priors_total_count']=compas_ori.loc[i,'juv_fel_count']+compas_ori.loc[i,'juv_misd_count']+compas_ori.loc[i,'priors_count']
        
        # age less than 25
        # age greater than 45
        if compas_ori.loc[i,"age"]< 25:
            compas_ori.loc[i,"less25"]=1
            compas_ori.loc[i,"over45"]=0
        elif compas_ori.loc[i,"age"]> 45:
            compas_ori.loc[i,"less25"]=0
            compas_ori.loc[i,"over45"]=1
        else:
            compas_ori.loc[i,"less25"]=0
            compas_ori.loc[i,"over45"]=0

    compas_ori=compas_ori.drop(['age','age_cat','juv_fel_count','juv_misd_count','juv_misd_count','priors_count'],axis=1)
    compas_ori.to_csv('COMPAS3_CLEAN.csv',index=None)
    print(compas_ori.shape[0])
    
    # base rate: 1-P(is_recid=1|S=s)
    base0=(1-compas_ori[(compas_ori["race"]==0) & (compas_ori["is_recid"]==1)].shape[0]/ compas_ori[compas_ori["race"]==0].shape[0])*100
    base1=(1-compas_ori[(compas_ori["race"]==1) & (compas_ori["is_recid"]==1)].shape[0]/ compas_ori[compas_ori["race"]==1].shape[0])*100
    
    return compas_ori,[base0,base1]

# Split dataset

In [5]:
def split_data(df):
    length = df.shape[0]
    # define the ratios 8:2
    train_len = int(length * 0.8)
    #test_len = int(length * 0.3)

    # split the dataframe
    idx = [*range(length)]
    shuffle(idx)  # shuffle the index
    I_train = idx[:train_len]
    I_test = idx[train_len:length]
    df_train = df.loc[I_train]
    df_test = df.loc[I_test]
    
    # update indices
    #df_train.index=[*range(df_train.shape[0])]
    #df_test.index=[*range(df_test.shape[0])]

    # output training set and test set
    return [I_train,I_test,df_train,df_test]

# Training process

In [301]:
def training_process(train,I_train, model_type):
    if model_type=='subgroup-fair':
        # subgroup fair
        # Training Model D.No time series; 2 features; age range
        #I_train = train.shape[0]
        I0_train=train[train['race']==0].index
        I1_train=train[train['race']==1].index

        # Apply Subgroup-Fair
        # Parameters
        level = 1

        # Decision Variables
        a = generate_operators("a", n_vars=2, hermitian=True, commutative=False) # age < 25
        #b = generate_operators("b", n_vars=2, hermitian=True, commutative=False) # age > 45
        c = generate_operators("c", n_vars=2, hermitian=True, commutative=False) # total number of previous convictions
        d = generate_operators("d", n_vars=2, hermitian=True, commutative=False)
        e = generate_operators("e", n_vars=2, hermitian=True, commutative=False)
        #f = generate_operators("f", n_vars=1, hermitian=True, commutative=False)[0]
        z = generate_operators("z", n_vars=3, hermitian=True, commutative=True)

        # Constraints
        ine1 = [z[0]+train['is_recid'][i] - a[0]*train['less25'][i] - c[0]*train['priors_total_count'][i] - d[0]*train['decile_score'][i] + e[0] for i in I0_train]
        ine2 = [z[0]-train['is_recid'][i] + a[0]*train['less25'][i] + c[0]*train['priors_total_count'][i] + d[0]*train['decile_score'][i] + e[0] for i in I0_train]
        ine3 = [z[0]+train['is_recid'][i] - a[1]*train['less25'][i] - c[1]*train['priors_total_count'][i] - d[1]*train['decile_score'][i] + e[1] for i in I1_train]
        ine4 = [z[0]-train['is_recid'][i] + a[1]*train['less25'][i] + c[1]*train['priors_total_count'][i] + d[1]*train['decile_score'][i] + e[1] for i in I1_train]
        max1 =[z[1]-sum((train['is_recid'][i]-a[0]*train['less25'][i] - c[0]*train['priors_total_count'][i] - d[0]*train['decile_score'][i] + e[0])**2 for i in I0_train)/len(I0_train)]
        max2 =[z[2]-sum((train['is_recid'][i]-a[1]*train['less25'][i] - c[1]*train['priors_total_count'][i] - d[1]*train['decile_score'][i] + e[1])**2 for i in I1_train)/len(I1_train)]
        
        #cal1=[0.01*(sum(a[0]*train['less25'][i] + c[0]*train['priors_total_count'][i] + d[0]*train['decile_score'][i] + e[0] for i in I_train)- sum(train['is_recid'][i] for i in I_train))]
        #cal2=[0.01*(-sum(a[0]*train['less25'][i] + c[0]*train['priors_total_count'][i] + d[0]*train['decile_score'][i] + e[0] for i in I_train)+ sum(train['is_recid'][i] for i in I_train))]

        ine_D=ine1+ine2+ine3+ine4+max1+max2

        # Objective
        obj_D = z[0] + z[1] + z[2] + 0.5*(z[2]-z[1])
        #z[0] + 5*z[1] #+ sum(a[0]*train['less25'][i] + c[0]*train['priors_total_count'][i] + d[0]*train['decile_score'][i] + e[0] for i in I_train)- sum(train['is_recid'][i] for i in I_train)
        #+ e[0]**2 + e[1]**2 

        # Solve the fair NCPOP B
        sdp_D = SdpRelaxation(variables = flatten([a,c,d,e,z]), verbose = 2)
        sdp_D.get_relaxation(level, objective=obj_D, inequalities=ine_D)
        sdp_D.solve(solver='sdpa', solverparameters={"executable":"sdpa_gmp","executable": "C:\\Users\\zhouq\\Documents\\sdpa7-windows\\sdpa.exe"})
        print(sdp_D.primal, sdp_D.dual, sdp_D.status)

        return [sdp_D[a[0]],sdp_D[a[1]],sdp_D[c[0]],sdp_D[c[1]],sdp_D[d[0]],sdp_D[d[1]],sdp_D[e[0]],sdp_D[e[1]]]

    elif model_type=='instant-fair':
        # instant fair
        # Training Model D.No time series; 2 features; age range
        #I_train = train.shape[0]
        I0_train=train[train['race']==0].index
        I1_train=train[train['race']==1].index

        # Apply Subgroup-Fair
        # Parameters
        level = 1

        # Decision Variables
        a = generate_operators("a", n_vars=2, hermitian=True, commutative=False) # age < 25
        #b = generate_operators("b", n_vars=2, hermitian=True, commutative=False) # age > 45
        c = generate_operators("c", n_vars=2, hermitian=True, commutative=False) # total number of previous convictions
        d = generate_operators("d", n_vars=2, hermitian=True, commutative=False)
        e = generate_operators("e", n_vars=2, hermitian=True, commutative=False)
        #f = generate_operators("f", n_vars=1, hermitian=True, commutative=False)[0]
        z = generate_operators("z", n_vars=2, hermitian=True, commutative=True)

        # Constraints
        ine1 = [(z[0]+train['is_recid'][i] - a[0]*train['less25'][i] - c[0]*train['priors_total_count'][i] - d[0]*train['decile_score'][i] + e[0])/len(I0_train) for i in I0_train]
        ine2 = [(z[0]-train['is_recid'][i] + a[0]*train['less25'][i] + c[0]*train['priors_total_count'][i] + d[0]*train['decile_score'][i] + e[0])/len(I0_train) for i in I0_train]
        ine3 = [(z[0]+train['is_recid'][i] - a[1]*train['less25'][i] - c[1]*train['priors_total_count'][i] - d[1]*train['decile_score'][i] + e[1])/len(I1_train) for i in I1_train]
        ine4 = [(z[0]-train['is_recid'][i] + a[1]*train['less25'][i] + c[1]*train['priors_total_count'][i] + d[1]*train['decile_score'][i] + e[1])/len(I1_train) for i in I1_train]
        max0 = [(z[1]+(train['is_recid'][i]-a[0]*train['less25'][i] - c[0]*train['priors_total_count'][i] - d[0]*train['decile_score'][i] + e[0]))/len(I0_train) for i in I0_train]
        max1 = [(z[1]-(train['is_recid'][i]-a[1]*train['less25'][i] - c[1]*train['priors_total_count'][i] - d[1]*train['decile_score'][i] + e[1]))/len(I1_train) for i in I1_train]
        #cal1=[0.01*(sum(a[0]*train['less25'][i] + c[0]*train['priors_total_count'][i] + d[0]*train['decile_score'][i] + e[0] for i in I_train)- sum(train['is_recid'][i] for i in I_train))]
        #cal2=[0.01*(-sum(a[0]*train['less25'][i] + c[0]*train['priors_total_count'][i] + d[0]*train['decile_score'][i] + e[0] for i in I_train)+ sum(train['is_recid'][i] for i in I_train))]

        ine_D=ine1+ine2+ine3+ine4+max0+max1

        # Objective
        obj_D = z[0] + z[1] 
        #z[0]/len(I1_train) + z[1]/len(I1_train) #+ sum(a[0]*train['less25'][i] + c[0]*train['priors_total_count'][i] + d[0]*train['decile_score'][i] + e[0] for i in I_train)- sum(train['is_recid'][i] for i in I_train)
        #+ e[0]**2 + e[1]**2 

        # Solve the fair NCPOP B
        sdp_D = SdpRelaxation(variables = flatten([a,c,d,e,z]), verbose = 2)
        sdp_D.get_relaxation(level, objective=obj_D, inequalities=ine_D)
        sdp_D.solve(solver='sdpa', solverparameters={"executable":"sdpa_gmp","executable": "C:\\Users\\zhouq\\Documents\\sdpa7-windows\\sdpa.exe"})
        print(sdp_D.primal, sdp_D.dual, sdp_D.status)

        return [sdp_D[a[0]],sdp_D[a[1]],sdp_D[c[0]],sdp_D[c[1]],sdp_D[d[0]],sdp_D[d[1]],sdp_D[e[0]],sdp_D[e[1]]]
 
    else: 
        print('This model does not exist')
        return;

# Extract scores

In [153]:
#param=training_process(train,I_train)
def extract_scores(test,I_test,param,model_type):
    arr=[]
    a0=param[0]
    a1=param[1]
    c0=param[2]
    c1=param[3]
    d0=param[4]
    d1=param[5]
    e0=param[6]
    e1=param[7]
    
    for i in I_test:
        if test.loc[i,'race']==0:
            arr+=[a0*test['less25'][i] + c0*test['priors_total_count'][i] + d0*test['decile_score'][i] + e0]
        else:
            arr+=[a1*test['less25'][i] + c1*test['priors_total_count'][i] + d1*test['decile_score'][i] + e1]
    
    normalized_arr=normalize_score1(np.array(arr))
    i=0
    for j in I_test:
        test.loc[j,model_type]=normalized_arr[i]
        i+=1
    
    return test #np.array(arr)

In [154]:
def detect_outliers(data,threshold):
    # return the location of outliers.
    mean_d = np.mean(data)
    std_d = np.std(data)
    outliers = []
    
    for i in range(len(data)):
        z_score= (data[i] - mean_d)/std_d 
        if np.abs(z_score) > threshold:
            outliers.append(i)
    return outliers

In [155]:
def normalize_score1(arr):
    # Min-Max normalized scores after deleting outliers.
     #Outliers are set to 0 (if too small) or 1 (if too large) directly.
    outlierPosition=detect_outliers(arr,3)
    arr_clean = np.delete(arr,outlierPosition)
    #arr_clean=arr
    arr_min=np.min(arr_clean)
    arr_max=np.max(arr_clean)

    normalized_arr = np.array([int(10*float(x - arr_min)/(arr_max - arr_min)) for x in arr])
    #arr_nor = [int(10*float(x - np.mean(arr)/np.std(arr)) ) for x in arr]
    normalized_arr[normalized_arr>10]=10
    normalized_arr[normalized_arr<0]=0
    
    return normalized_arr

In [156]:
# optional normalization method
def normalize_score(arr):
    normalized_arr = arr/np.linalg.norm(arr,ord=10)
    normalized_arr[normalized_arr>1]=1
    normalized_arr[normalized_arr<0]=0
    
    return normalized_arr

# Testing process

In [157]:
def perf_measure(y_actual, y_hat):
#Output: Positive rate, False positive rate; False negative rate; True positive rate
# Positive event is being predicted not to re-offend   0
# Negative event is being predicted to re-offend  1

    TP = 0
    TN = 0
    FP = 0 # False Positive
    FN = 0 # False Negative
    
    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==0:
            TP += 1
        if y_hat[i]==0 and y_actual[i]!=y_hat[i]:
            FP += 1
        if y_actual[i]==y_hat[i]==1:
            TN += 1
        if y_hat[i]==1 and y_actual[i]!=y_hat[i]:
            FN += 1
    #print(TP,TN,FP,FN)
    
    PR =(TP+FP)/len(y_hat) # Positive rate
    NR =(TN+FN)/len(y_hat) # Positive rate
    FPR=FP/(FP+TN+10**(-6)) # False positive rate # + 0.001 to avoid being divided by zero # 干了坏事没被发现
    #TPR=TP/(TP+FN+10**(-6)) # True positive rate # recall/sensitivity # 没干坏事没被冤枉
    FNR=FN/(FN+TP+10**(-6)) # False Omission Rate # 没干坏事却被冤枉
    PPV=TP/(TP+FP+10**(-6)) # positive predictive value (PPV) precision #  成功预测出好人
    NPV=TN/(TN+FN+10**(-6)) # negative predictive value (NPV) # 成功预测出坏人
    inACC=FP+FN # False prediction number
    
    return [NR,FPR,FNR,PPV,NPV,inACC] 
# (len(y_hat)-inAcc)/len(y_hat)
#FP/(FP+TP),FN/(FN+TN),
    #return(TP, FP, TN, FN)

In [158]:
def fair_metric(test, I0_test, I1_test, threshold, score_name):
    # evaluate fairness of a classifier using three criteria: independence, separation and sufficiency.
    th0=np.percentile(test[score_name], [threshold[0]])[0]
    th1=np.percentile(test[score_name], [threshold[1]])[0]
    
    #y_actual=test['is_recid'].tolist()
    #y_hat=list((test[score_name]>=th).astype(int))
    #y_hat=list(1-test[score_name])
    #fpr,tpr,thresholds = metrics.roc_curve(y_actual, y_hat, pos_label=1)
    #AUC=metrics.auc(fpr, tpr)
    
    y_actual_I0=test.loc[I0_test,'is_recid'].tolist()
    y_hat_I0=(test.loc[I0_test,score_name]>=th0).astype(int).tolist()
    #y_compas_I0=(compas_test.loc[I0_test,'compas_decile_score']>=th).astype(int).tolist()

    y_actual_I1=test.loc[I1_test,'is_recid'].tolist()
    y_hat_I1=(test.loc[I1_test,score_name]>=th1).astype(int).tolist()
    #y_compas_I1=(compas_test.loc[I1_test,'compas_decile_score']>=th).astype(int).tolist()
    
    perf_I0=perf_measure(y_actual_I0, y_hat_I0)
    perf_I1=perf_measure(y_actual_I1, y_hat_I1)

    IND=abs(perf_I0[0]-perf_I1[0])
    SP =(abs(perf_I0[1]-perf_I1[1])+abs(perf_I1[2]-perf_I0[2]))/2  # abs(perf_I0[1]-perf_I1[1])+
    #print((perf_I0[2]-perf_I1[2]))
    SF =(abs(perf_I0[4]-perf_I1[4])) #abs(perf_I0[3]-perf_I1[3])+
    #print((perf_I0[4]-perf_I1[4]))
    INA =(perf_I0[5]+perf_I1[5])/test.shape[0] # perdiction performance -- inaccuracy
    return [IND,SP,SF,INA]

#AUC= auc(FP,TP)

# 'compas_decile_score' 'our_score_D

In [118]:
def improvement(test,I_test,threshold,model_type):
    # Output: the fairness improvement (percentage) of our post-processing compared to original compas_decile_score.
    #I_test=test.shape[0]
    I0_test=test[test['race']==0].index
    I1_test=test[test['race']==1].index
    
    metric_D=fair_metric(test,I0_test, I1_test, threshold, model_type)
    metric_compas=fair_metric(test,I0_test, I1_test, threshold, 'decile_score')
    improve_vector=[(metric_compas[i]-metric_D[i])/(metric_compas[i]+10**(-6)) for i in range(4)] # output the improvement
    return [metric_D,metric_compas,improve_vector]

# The algorithm

In [6]:
def Post_Processing(compas,threshold,model_types):
    # split the whole dataset into a training set and a test set.
    # generate scores by a training process
    # normalize scores using Min-Max Normalization.
    # compute 3 fairness measures from our normalized scores.
    # compare the our scores with compas score in terms of 3 fairness measures.
    [I_train,I_test,train,test]=split_data(compas)
    model_perf={}
    for m in model_types:
        param=training_process(train,I_train, m)
        test=extract_scores(test,I_test,param,m)
        model_perf[m]=improvement(test,I_test,threshold,m)   
    
    return model_perf

In [173]:
np.mean(base_rate)

52.61823697682662

In [308]:
#threshold=50 # use the 50th percentage of scores
data,base_rate=prepare_data(1200)
improve=pd.DataFrame(columns=['IND', 'SP', 'SF','INA','type'])
metric =pd.DataFrame(columns=['IND', 'SP', 'SF','INA','type'])
model_types=['subgroup-fair','instant-fair']
for k in range(50):
    model_perf=Post_Processing(data,base_rate,model_types)
    i=0
    for k in model_perf.keys():
        print(i)
        metric=metric.append({'IND':model_perf[k][0][0],'SP':model_perf[k][0][1],'SF':model_perf[k][0][2],'INA':model_perf[k][0][3],'type':k},ignore_index=True) 
        if i == 0:
            metric=metric.append({'IND':model_perf[k][1][0],'SP':model_perf[k][1][1],'SF':model_perf[k][1][2],'INA':model_perf[k][1][3],'type':'COMPAS'},ignore_index=True) 
        print(k,': ',model_perf[k][2])
        improve=improve.append({'IND':model_perf[k][2][0],'SP':model_perf[k][2][1],'SF':model_perf[k][2][2],'INA':model_perf[k][2][3],'type':k},ignore_index=True) 
        i+=1

1005
The problem has 3 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 77
Generating moment matrix...
Reduced number of SDP variables: 77 77 (done: 101.30%, ETA 00:00:-0.0)
[KProcessing 1610/1610 constraints...
1.2770501481121816 1.2770500347968918 optimal
The problem has 2 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 65
Generating moment matrix...
Reduced number of SDP variables: 65 65 (done: 101.54%, ETA 00:00:-0.0)
[KProcessing 2412/2412 constraints...
1.8181818197220423 1.8181815583093377 unknown
0
subgroup-fair :  [-0.3323492382035135, -0.33498209664652323, 0.17100658858942996, 0.0]
1
instant-fair :  [0.3694733552368841, 0.3093863477411962, -1.0162588198323854, -0.4999987115417818]
The problem has 3 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 77
Generating moment matrix...
Reduced nu

The problem has 3 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 77
Generating moment matrix...
Reduced number of SDP variables: 77 77 (done: 101.30%, ETA 00:00:-0.0)
[KProcessing 1610/1610 constraints...
1.2450050366934273 1.2450046009579367 unknown
The problem has 2 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 65
Generating moment matrix...
Reduced number of SDP variables: 65 65 (done: 101.54%, ETA 00:00:-0.0)
[KProcessing 2412/2412 constraints...
1.818181836946394 1.8181768944723198 unknown
0
subgroup-fair :  [0.37116630692986147, 0.3793427363855262, -0.7894105449549702, 0.025640959566245246]
1
instant-fair :  [0.9999973399748004, 0.9999972749375329, -1.3738030893822193, -0.24358911587932963]
The problem has 3 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 77
Generating moment matrix...


The problem has 3 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 77
Generating moment matrix...
Reduced number of SDP variables: 77 77 (done: 101.30%, ETA 00:00:-0.0)
[KProcessing 1610/1610 constraints...
1.253097558314439 1.253096694829034 unknown
The problem has 2 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 65
Generating moment matrix...
Reduced number of SDP variables: 65 65 (done: 101.54%, ETA 00:00:-0.0)
[KProcessing 2412/2412 constraints...
1.8181818191163757 1.8181816537710627 optimal
0
subgroup-fair :  [0.3687306341296596, 0.4738546843683637, 0.5660525876847107, 0.07575734504202494]
1
instant-fair :  [0.9999974797596367, 0.9999973404586757, 0.38766267974122365, -0.6212102293446047]
The problem has 3 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 77
Generating moment matrix...
Reduc

The problem has 3 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 77
Generating moment matrix...
Reduced number of SDP variables: 77 77 (done: 101.30%, ETA 00:00:-0.0)
[KProcessing 1610/1610 constraints...
1.2781554618194013 1.2781544683966166 unknown
The problem has 2 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 65
Generating moment matrix...
Reduced number of SDP variables: 65 65 (done: 101.54%, ETA 00:00:-0.0)
[KProcessing 2412/2412 constraints...
1.8181818801860459 1.8181750560473777 unknown
0
subgroup-fair :  [0.18559718802937739, 0.24524384206029778, 0.6784327325788556, 0.030302938016810013]
1
instant-fair :  [0.9999976491022851, 0.9999973367107873, -8.026528883048236, -0.6212102293446047]
The problem has 3 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 77
Generating moment matrix...
R

The problem has 3 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 77
Generating moment matrix...
Reduced number of SDP variables: 77 77 (done: 101.30%, ETA 00:00:-0.0)
[KProcessing 1610/1610 constraints...
1.2533716048130596 1.25337083541968 unknown
The problem has 2 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 65
Generating moment matrix...
Reduced number of SDP variables: 65 65 (done: 101.54%, ETA 00:00:-0.0)
[KProcessing 2412/2412 constraints...
1.8181818607059768 1.8181737164241705 unknown
0
subgroup-fair :  [0.2557241015721476, 0.31074521918314074, -0.11295790796094997, 0.0]
1
instant-fair :  [0.679386419102123, 0.57403480906472, 0.04198017578022727, -0.2911384997615387]
The problem has 3 commuting, 8 noncommuting Hermitian variables
Calculating block structure...
Estimated number of SDP variables: 77
Generating moment matrix...
Reduced number of SDP

In [309]:
metric

Unnamed: 0,IND,SP,SF,INA,type
0,0.224796,2.250624e-01,0.026760,0.388060,subgroup-fair
1,0.168721,1.685880e-01,0.032280,0.388060,COMPAS
2,0.106383,1.164289e-01,0.065087,0.582090,instant-fair
3,0.320679,2.491259e-01,0.101351,0.268657,subgroup-fair
4,0.463536,3.965618e-01,0.041667,0.273632,COMPAS
...,...,...,...,...,...
145,0.446526,3.902737e-01,0.095850,0.348259,COMPAS
146,0.000000,5.660377e-10,0.233895,0.512438,instant-fair
147,0.373456,3.367720e-01,0.177340,0.348259,subgroup-fair
148,0.428494,3.908525e-01,0.143382,0.343284,COMPAS


In [310]:
improve

Unnamed: 0,IND,SP,SF,INA,type
0,-0.332349,-0.334982,0.171007,0.000000,subgroup-fair
1,0.369473,0.309386,-1.016259,-0.499999,instant-fair
2,0.308189,0.371785,-1.432397,0.018182,subgroup-fair
3,0.786636,0.717118,-4.326723,-0.872724,instant-fair
4,0.075075,0.123846,-9.999138,0.081081,subgroup-fair
...,...,...,...,...,...
95,0.999998,0.999998,-2.149553,-0.120482,instant-fair
96,0.000000,0.000000,0.000000,0.000000,subgroup-fair
97,0.999998,0.999997,-1.440206,-0.471427,instant-fair
98,0.128445,0.138365,-0.236830,-0.014493,subgroup-fair


In [311]:
improve[improve['type']=='subgroup-fair'].mean()

IND    0.173813
SP     0.216605
SF    -0.482263
INA    0.020614
dtype: float64

In [312]:
improve[improve['type']=='instant-fair'].mean()

IND    0.957417
SP     0.946557
SF    -2.062031
INA   -0.503038
dtype: float64

In [313]:
metric.to_csv('COMPAS3_metric.csv',index=None)
improve.to_csv('COMPAS3_improve.csv',index=None)