# Alogorithm 4 implementation

## Import essential packages

In [None]:
import warnings

import time,sys
import zipfile
import random
import pandas as pd
import numpy as np
import tensorflow as tf
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from tensorflow import keras
from keras.layers import Dense, Input
from tensorflow.keras import Model

warnings.filterwarnings('ignore')

## Load Database and Data Preprocessing


In [None]:
!mkdir data
# if using local data, this code chunk should be commented
drive.mount('/content/drive')

mkdir: cannot create directory ‘data’: File exists
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
start=time.time()
data = pd.read_csv('drive/MyDrive/5243Project4/compas-scores-two-years.csv')
#print(data.shape)
# filter out groups other than African-American and Caucasian and set them as 0-1
data = data[(data['race']=='African-American') | (data['race']=='Caucasian')]
data['race'].loc[data['race']=='Caucasian'] = 1
data['race'].loc[data['race']=='African-American'] = 0
#print(data.shape)

nan = (data.isnull().sum()/len(data))
nan = nan[nan > 0.15].sort_values()
nan_var = list(nan.index)
data = data.drop(columns=nan_var)

data['c_jail_in'] = pd.to_datetime(data['c_jail_in'])
data['c_jail_out'] = pd.to_datetime(data['c_jail_out'])
data['los'] = np.log((data['c_jail_out']-data['c_jail_in']).astype('timedelta64[h]')+1)#use log hours
data['in_custody'] = pd.to_datetime(data['in_custody'])
data['out_custody'] = pd.to_datetime(data['out_custody'])
data['custody'] = np.log((data['out_custody']-data['in_custody']).astype('timedelta64[h]')+1)
data['lasts'] = np.log(data['end']-data['start']+1)
data['c_days_from_compas'] = np.log(data['c_days_from_compas']+1)

#filter out useless variables including high correlation and string type
useless_var = ['id','name','first','last','compas_screening_date','dob','age_cat','days_b_screening_arrest',
               'c_jail_in','c_jail_out','c_case_number','c_charge_desc','is_recid',
               'type_of_assessment','screening_date','v_type_of_assessment',
               'v_screening_date','in_custody','out_custody','score_text','v_score_text',
               'decile_score.1','v_decile_score','priors_count.1','start','end']
data = data.drop(columns=useless_var)
data = data[data['los']!=float('-inf')]
data = data[data['custody']!=float('-inf')]
data = data[data['lasts']!=float('-inf')]

#one hot encoding on several features:sex,age_cat,c_charge_degree
data['sex'].loc[data['sex']=='Male']= 1
data['sex'].loc[data['sex']=='Female']= 0
data['c_charge_degree'].loc[data['c_charge_degree']=='M']= 1
data['c_charge_degree'].loc[data['c_charge_degree']=='F']= 0
#data.to_csv('./data/compas_preproc.csv',index=False,header=True)
del nan_var, useless_var

#data = data[['age','race','sex','decile_score','priors_count','los','c_charge_degree','two_year_recid']]
data = data.dropna()#6150*23->5730*16
#print(data.shape)
print(data.head(5))

X = data.drop(columns='two_year_recid')
features = list(X.columns)

X.index = range(data.shape[0])
#As age, priors_count, los are continuous variables, we can scale them
X_cont = X[['age', 'juv_fel_count', 'decile_score', 'juv_misd_count', 'juv_other_count', 'priors_count', 'c_days_from_compas', 'los', 'custody', 'lasts']]
X_cate = X[['sex', 'race', 'c_charge_degree', 'is_violent_recid', 'event']]
X_cont = pd.DataFrame(StandardScaler().fit_transform(X_cont),columns=['age', 'juv_fel_count', 'decile_score', 'juv_misd_count', 'juv_other_count', 'priors_count', 'c_days_from_compas', 'los', 'custody', 'lasts'])
#X_cont = X[['age', 'decile_score', 'priors_count', 'los']]
#X_cate = X[['sex', 'race', 'c_charge_degree']]
#X_cont = pd.DataFrame(StandardScaler().fit_transform(X_cont),columns=['age', 'decile_score', 'priors_count', 'los'])

X = pd.concat([X_cate,X_cont],axis=1)
#X['decile_score'] = X['decile_score']/10
y = data.two_year_recid
# convert class label 0 to -1 so as to add sign in distance
#y[y==0] = -1
features = list(X.columns)

X = np.asarray(X).astype('float32')
y = np.asarray(y).astype('float32')

del X_cate,X_cont

  sex  age race  juv_fel_count  decile_score  juv_misd_count  juv_other_count  \
1   1   34    0              0             3               0                0   
2   1   24    0              0             4               0                1   
6   1   41    1              0             6               0                0   
8   0   39    1              0             1               0                0   
9   1   21    1              0             3               0                0   

   priors_count  c_days_from_compas c_charge_degree  is_violent_recid  event  \
1             0            0.693147               0                 1      1   
2             4            0.693147               0                 0      0   
6            14            0.693147               0                 0      1   
8             0            0.693147               1                 0      0   
9             1            5.733341               0                 1      1   

   two_year_recid       los   cu

# Data Splitting

In [None]:
#Use 5:1:1 as the ratio of train:val:test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=800, random_state=3)
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train,test_size=800,random_state=3)
train_size = len(y_train)
train_idx_AA = np.array(range(train_size))[X_train[:,1]==0.0]
train_idx_C = np.array(range(train_size))[X_train[:,1]==1.0]
test_size = len(y_test)
test_idx_AA = np.array(range(test_size))[X_test[:,1]==0.0]
test_idx_C = np.array(range(test_size))[X_test[:,1]==1.0]

print('\n',"#"*80,'\n',' '*20," Split up Train-Validation-Test sets ",'\n',"#"*80,'\n')
print(" X_train size: ", X_train.shape, ",      y_train size: ", y_train.shape, '\n',
      "X_validation size: ", X_val.shape, ",  y_validation size: ", y_val.shape, '\n',
      "X_test size: ", X_test.shape, ',        y_test size: ',y_test.shape)
print(" X_train_AA size: ", X_train[train_idx_AA].shape, ",   X_train_C size: ", X_train[train_idx_C].shape, '\n',
      "X_test_AA size: ", X_test[test_idx_AA].shape, ',     X_test_C size: ',X_test[test_idx_C].shape, '\n',
      "Ratio:", X_train[train_idx_AA].shape[0]/X_train[train_idx_C].shape[0],X_test[test_idx_AA].shape[0]/X_test[test_idx_C].shape[0])
print('\n',"#"*80)


 ################################################################################ 
                       Split up Train-Validation-Test sets  
 ################################################################################ 

 X_train size:  (4130, 15) ,      y_train size:  (4130,) 
 X_validation size:  (800, 15) ,  y_validation size:  (800,) 
 X_test size:  (800, 15) ,        y_test size:  (800,)
 X_train_AA size:  (2472, 15) ,   X_train_C size:  (1658, 15) 
 X_test_AA size:  (478, 15) ,     X_test_C size:  (322, 15) 
 Ratio: 1.490952955367913 1.484472049689441

 ################################################################################


# Baseline Model And Evaluation

In [None]:
#If use Neural Network as base model
def base_nn_model(X_in,y_in,X_val,y_val):
    feature = Input(X_in.shape[1],)
    y = Dense(2,"softmax")(feature)
    model = Model(feature,y)
    
    adam = tf.keras.optimizers.Adam(0.001)
    loss = keras.losses.BinaryCrossentropy(from_logits=True)
    metric = [tf.keras.metrics.BinaryAccuracy()]
    #,tf.keras.metrics.FalsePositives()，tf.keras.metrics.FalseNegatives()
    model.compile(optimizer=adam, loss=loss, metrics=metric)
    model.fit(X_in,tf.one_hot(y_in,2),epochs=10,batch_size=10,validation_data=(X_val,tf.one_hot(y_val,2)))
    return model

def evaluation(model,X,y):
    y_pred = model.predict(X)
    y_pred = np.argmax(np.round(y_pred), axis=1)
    y_pred_AA, y_test_AA = y_pred[test_idx_AA], y[test_idx_AA]
    y_pred_C, y_test_C = y_pred[test_idx_C], y[test_idx_C]
    acc = model.evaluate(X, tf.one_hot(y,2))[1]
    FPR_all = sum(y_pred[y==0]==1)/len(y[y==0])
    FNR_all = sum(y_pred[y==1]==0)/len(y[y==1])
    FPR_AA = sum(y_pred_AA[y_test_AA==0]==1)/len(y_test_AA[y_test_AA==0])
    FNR_AA = sum(y_pred_AA[y_test_AA==1]==0)/len(y_test_AA[y_test_AA==1])
    FPR_C = sum(y_pred_C[y_test_C==0]==1)/len(y_test_C[y_test_C==0])
    FNR_C = sum(y_pred_C[y_test_C==1]==0)/len(y_test_C[y_test_C==1])
    pred_p_AA, pred_p_C = np.mean(y_pred_AA==1), np.mean(y_pred_C==1)
    acc_AA, acc_C = np.mean(y_pred_AA == y_test_AA), np.mean(y_pred_C == y_test_C)
    print('\n',"#"*80)
    print('The accuracy of baseline model NN is: %3f.'%(acc))
    print('The False Positive Rate for overall population is: %3f.'%FPR_all)
    print('The False Negative Rate for overall population is: %3f.'%FNR_all)
    print("Specifically:")
    print('Parity Check: The rate of positive estimate for African American and Caucasian are %3f and %3f, and D_par=%3f.'%(pred_p_AA,pred_p_C,pred_p_AA-pred_p_C))
    print('Calibration Check: The rate of correct estimate for African American and Caucasian are %3f and %3f, and D_cal=%3f.'%(acc_AA,acc_C,acc_AA-acc_C))
    print('The False Positive Rate for African American and Caucasian are %3f and %3f, and D_FPR=%3f.'%(FPR_AA,FPR_C,FPR_AA-FPR_C))
    print('The False Negative Rate for African American and Caucasian are %3f and %3f, and D_FNR=%3f.'%(FNR_AA,FNR_C,FNR_AA-FNR_C))
    print('\n',"#"*80)

In [None]:
#If we don't drop 'race' in the X_train and X_test:
NN1 = base_nn_model(X_train,y_train,X_val,y_val)
evaluation(NN1,X_test,y_test)
#If we drop 'race' in the X_train and X_test:
NN2 = base_nn_model(np.delete(X_train,0,1),y_train,np.delete(X_val,0,1),y_val)
evaluation(NN2,np.delete(X_test,1,1),y_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

 ################################################################################
The accuracy of baseline model NN is: 0.927500.
The False Positive Rate for overall population is: 0.079137.
The False Negative Rate for overall population is: 0.065274.
Specifically:
Parity Check: The rate of positive estimate for African American and Caucasian are 0.562762 and 0.378882, and D_par=0.183880.
Calibration Check: The rate of correct estimate for African American and Caucasian are 0.928870 and 0.925466, and D_cal=0.003404.
The False Positive Rate for African American and Caucasian are 0.104072 and 0.051020, and D_FPR=0.053052.
The False Negative Rate for African American and Caucasian are 0.042802 and 0.111111, and D_FNR=-0.068310.

 ################################################################################
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoc

Then we custom constraint and loss to the base model:

In [None]:
def DFR(model,X,y,type):
    '''
    type: str in ['dfnr','dfpr','both']
    '''
    if type!='dfnr' and type!='dfpr' and type!='both':
        return None
    size = len(y)
    idx_AA = np.array(range(size))[X[:,1]==0.0]
    idx_C = np.array(range(size))[X[:,1]==1.0]
    y_pred = model.predict(X)
    y_pred = np.argmax(np.round(y_pred), axis=1)
    y_pred_AA, y_AA = y_pred[idx_AA], y[idx_AA]
    y_pred_C, y_C = y_pred[idx_C], y[idx_C] 
    FPR_AA = sum(y_pred_AA[y_AA==0]==1)/len(y_AA[y_AA==0])
    FNR_AA = sum(y_pred_AA[y_AA==1]==0)/len(y_AA[y_AA==1])
    FPR_C = sum(y_pred_C[y_C==0]==1)/len(y_C[y_C==0])
    FNR_C = sum(y_pred_C[y_C==1]==0)/len(y_C[y_C==1])
    dfnr = FNR_AA-FNR_C
    dfpr = FPR_AA-FPR_C
    if type=='dfnr':
        return dfnr
    elif type=='dfpr':
        return dfpr
    else:
        return dfnr,dfpr
dfnr,dfpr = DFR(NN1,X_test,y_test,'both')
print(dfnr,dfpr)

-0.06830955469087764 0.05305199002677995


In [None]:
def new_training_groups(model, X_train, y_train):
    '''
    X       n*d
    model.predict(X)    n*2
    y       n,
    delta   c,
    dfr   "dfpr","dfnr","both"
    '''
    
    #split training sets according to sensitive variable
    X_train_AA = X_train[np.array(X_train[:,1] == 0.0)]
    y_train_AA = y_train[np.array(X_train[:,1] == 0.0)]
    X_train_C = X_train[np.array(X_train[:,1] == 1.0)]
    y_train_C = y_train[np.array(X_train[:,1] == 1.0)]
    #get the ones with wrong prediction in discriminated group
    dn,dp = DFR(model,X_train,y_train,type="both")
    if dp>0: d = 0
    else: d = 1

    if d == 0:
        #take penalized trainers
        y_pred_AA = np.argmax(model.predict(X_train_AA),axis = 1)
        y_diff_AA = y_train_AA-y_pred_AA
        X_train_penalized = X_train_AA[y_diff_AA != 0.0]
        y_train_penalized = y_train_AA[y_diff_AA != 0.0]
        # safe trainers
        X_train_clean = X_train_AA[y_diff_AA == 0.0]
        y_train_clean = y_train_AA[y_diff_AA == 0.0]
        #make new
        X_train_clean = np.concatenate((X_train_clean,X_train_C),axis=0)
        y_train_clean = np.concatenate((y_train_clean,y_train_C),axis=0)
    
    else:
        #reverse the steps above for train set 1
        y_pred_C = np.argmax(model.predict(X_train_C),axis = 1)
        y_diff_C = y_train_C-y_pred_C
        X_train_penalized = X_train_C[y_diff_C != 0.0]
        y_train_penalized = y_train_C[y_diff_C != 0.0]
        # safe trainers
        X_train_clean = X_train_C[y_diff_C == 0.0]
        y_train_clean = y_train_C[y_diff_C == 0.0]
        #make new
        X_train_clean = np.concatenate((X_train_clean,X_train_AA),axis=0)
        y_train_clean = np.concatenate((y_train_clean,y_train_AA),axis=0)
    
    #X_train_penalized = tf.convert_to_tensor(X_train_penalized, dtype=tf.float32)
    #X_train_safe = tf.convert_to_tensor(X_train_safe, dtype=tf.float32)
    return X_train_clean , y_train_clean , X_train_penalized , y_train_penalized, dn, dp


Here we use Neural network as the base model, the result is unlikely to be reimplemented, but similar result will come out.

**Note: When few features are used in the model, the model and its prediction are unstable.

In [None]:
#########################[DON'T RUN THIS CELL IF UNNECESSARY]#########################

## initialization
np.random.seed(7777) 
model =  base_nn_model(X_train, y_train, X_val, y_val)

#initialized C and delta
C = 1
delta = 0.2
iter = 20

# new training groups
X_ts, y_ts, X_tp, y_tp, dn, dp = new_training_groups(model, X_train, y_train)
feature = Input(X_train.shape[1],)
y = Dense(2,"softmax")(feature)
mod_loop = Model(feature,y)
adam = tf.keras.optimizers.Adam(0.001)
loss = keras.losses.BinaryCrossentropy(from_logits=True)
metric = [tf.keras.metrics.BinaryAccuracy()]

def penal_loss(y_true,y_pred):
  return loss(tf.one_hot(y_tp,2), mod_loop(X_tp))

def clean_loss(y_true,y_pred):  
  return loss(tf.one_hot(y_ts,2), mod_loop(X_ts))

count = 0
# start while loop
while (count==0 or count%2==1 or abs(dp)>0.05) and count<iter: 
    # or examine dn, here count%2==1 used to control accuracy in case the accuracy is below 0.5 and one of the overall fpr/fnr will be close to 1
    C = C+delta
    #print('Count:%d'%count)
    mod_loop.compile(optimizer=adam,loss=[penal_loss, clean_loss],loss_weights=[C,1],metrics=metric)
    mod_loop.fit(X_train, tf.one_hot(y_train,2), epochs=10, validation_data=(X_val,tf.one_hot(y_val,2)))
    X_ts, y_ts, X_tp, y_tp, dn, dp = new_training_groups(mod_loop, X_train, y_train)
    #dp = DFR(model_in_loop,X_test,y_test,'dfpr')
    count+=1

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
#Evaluation
print(DFR(mod_loop,X_test,y_test,type='both'))
evaluation(mod_loop,X_test,y_test)

(0.04811314928046445, -0.030104349432080513)

 ################################################################################
The accuracy of baseline model NN is: 0.772500.
The False Positive Rate for overall population is: 0.290168.
The False Negative Rate for overall population is: 0.159269.
Specifically:
Parity Check: The rate of positive estimate for African American and Caucasian are 0.571130 and 0.527950, and D_par=0.043179.
Calibration Check: The rate of correct estimate for African American and Caucasian are 0.778243 and 0.763975, and D_cal=0.014268.
The False Positive Rate for African American and Caucasian are 0.276018 and 0.306122, and D_FPR=-0.030104.
The False Negative Rate for African American and Caucasian are 0.175097 and 0.126984, and D_FNR=0.048113.

 ################################################################################


In [None]:
####################################################################################################################################
####################### IF NEED TO MODIFY CODES, USE THIS CHUNK TO RUN ABOVE OR RUN BELOW ##########################################
####################################################################################################################################

## Fairness Beyond Disparate Treatment & Disparate Impact: Learning Classification without Disparate Mistreatment ($DM_{sen}$ & $DM$)

We first implement $DM_{sen}$ as we won't do anything to the dataset:

In [None]:
#Use 4:1 as the ratio of train:test
y = data.two_year_recid
y = np.asarray(y).astype('float32')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
train_size = len(y_train)
train_idx_AA = np.array(range(train_size))[X_train[:,1]==0.0]
train_idx_C = np.array(range(train_size))[X_train[:,1]==1.0]
test_size = len(y_test)
test_idx_AA = np.array(range(test_size))[X_test[:,1]==0.0]
test_idx_C = np.array(range(test_size))[X_test[:,1]==1.0]

X_train1 = np.hstack((np.ones(X_train.shape[0]).reshape(X_train.shape[0],1), X_train))
X_train1_AA = X_train1[train_idx_AA]
X_train1_C = X_train1[train_idx_C]
y_train_AA = y_train[train_idx_AA]
y_train_C = y_train[train_idx_C]
X_test1 = np.hstack((np.ones(X_test.shape[0]).reshape(X_test.shape[0],1), X_test))
X_test1_AA = X_test1[test_idx_AA]
X_test1_C = X_test1[test_idx_C]
y_test_AA = y_test[test_idx_AA]
y_test_C = y_test[test_idx_C]

print('\n',"#"*80,'\n',' '*20," Split up Train-Test sets ",'\n',"#"*80,'\n')
print(" X_train size: ", X_train1.shape, ",      y_train size: ", y_train.shape, '\n',
      "X_test size: ", X_test1.shape, ',       y_test size: ',y_test.shape)
print(" X_train_AA size: ", X_train1[train_idx_AA].shape, ",   X_train_C size: ", X_train1[train_idx_C].shape, '\n',
      "X_test_AA size: ", X_test1[test_idx_AA].shape, ',     X_test_C size: ',X_test1[test_idx_C].shape, '\n',
      "Ratio:", X_train1[train_idx_AA].shape[0]/X_train1[train_idx_C].shape[0],X_test1[test_idx_AA].shape[0]/X_test1[test_idx_C].shape[0])
print('\n',"#"*80)


 ################################################################################ 
                       Split up Train-Test sets  
 ################################################################################ 

 X_train size:  (4584, 16) ,      y_train size:  (4584,) 
 X_test size:  (1146, 16) ,       y_test size:  (1146,)
 X_train_AA size:  (2745, 16) ,   X_train_C size:  (1839, 16) 
 X_test_AA size:  (688, 16) ,     X_test_C size:  (458, 16) 
 Ratio: 1.4926590538336053 1.502183406113537

 ################################################################################


### Custom Loss: Add Constraints

From the paper, loss function is modified in logistic regression:

In [None]:
pip install dccp



In [None]:
import dccp
import cvxpy as cvx
from cvxpy import *

In [None]:
def lossfunc(X,theta,y_true):
    # This function returns the log loss.
    y_true= 2*y_true - 1 #{0,1}->{-1,1}
    log_loss = sum(logistic(multiply(-y_true, X*theta)))
    return log_loss


np.random.seed(5243)
theta = cvx.Variable(X_train1.shape[1])
theta.value = np.random.rand(theta.shape[0])

tau, mu, EPS = 0.005, 1.5, 1 
Prob1 = cvx.Problem(Minimize(lossfunc(X_train1,theta,y_train)),[]) # No constraints
             
      
print(dccp.is_dccp(Prob1))
print(theta.value)
#[0.5591043  0.9994264  0.57031546 0.43833912 0.08453454 0.05043884
# 0.91119515 0.16423428 0.3034639  0.41950956 0.85237613 0.4244003
# 0.96147514 0.26277008 0.02849745 0.61075812]
result = Prob1.solve(method='dccp', tau=tau, mu=mu, tau_max=1e10, verbose=True) #Here changes the theta.value, result avoids the output
print(theta.value)
#[-2.22198860e+00  1.75116119e-01  1.31373607e-01 -2.60536345e-01 1.53575511e+00  4.91247336e+00 
# -2.60491152e-01  4.36237090e-03  4.13966295e-01  7.39736815e-02  1.01397507e-01  2.79119037e-01
# -6.37340155e-02  3.96745359e-02  6.88413345e-02 -2.27307636e+00]

True
[0.5591043  0.9994264  0.57031546 0.43833912 0.08453454 0.05043884
 0.91119515 0.16423428 0.3034639  0.41950956 0.85237613 0.4244003
 0.96147514 0.26277008 0.02849745 0.61075812]
[-2.02760707  0.12980288  0.12303316 -0.27410271  1.71950929  4.75786739
 -0.27591511  0.02399848  0.45356176  0.05861524  0.04602311  0.27193313
 -0.04650322  0.15181987 -0.45112505 -2.24001541]


In [None]:
def predict(X,theta):
    #y:{-1,1}->{0,1}
    d = np.dot(X,theta)
    y_pred = (np.sign(d) + 1)/2
    return y_pred

theta_star = theta.value
y_pred = predict(X_test1, theta_star)

In [None]:
def evaluation_DM(X,y,y_pred):
    size = X.shape[0]
    idx_AA = np.array(range(size))[X[:,1]==0.0]
    idx_C = np.array(range(size))[X[:,1]==1.0]
    y_pred_AA, y_test_AA = y_pred[idx_AA], y[idx_AA]
    y_pred_C, y_test_C = y_pred[idx_C], y[idx_C]
    FPR_all = np.sum(y_pred[y==0]==1)/len(y[y==0])
    FNR_all = np.sum(y_pred[y==1]==0)/len(y[y==1])
    FPR_AA = np.sum(y_pred_AA[y_test_AA==0]==1)/len(y_test_AA[y_test_AA==0])
    FNR_AA = np.sum(y_pred_AA[y_test_AA==1]==0)/len(y_test_AA[y_test_AA==1])
    FPR_C = np.sum(y_pred_C[y_test_C==0]==1)/len(y_test_C[y_test_C==0])
    FNR_C = np.sum(y_pred_C[y_test_C==1]==0)/len(y_test_C[y_test_C==1])
    pred_p_AA, pred_p_C = np.mean(y_pred_AA==1), np.mean(y_pred_C==1)
    acc = np.sum(y_pred == y)/len(y)
    acc_AA, acc_C = np.mean(y_pred_AA == y_test_AA), np.mean(y_pred_C == y_test_C)
    print('\n',"#"*80)
    print('The accuracy of baseline model LR is: %3f.'%(acc))
    print('The False Positive Rate for overall population is: %3f.'%FPR_all)
    print('The False Negative Rate for overall population is: %3f.'%FNR_all)
    print("Specifically:")
    print('Parity Check: The rate of positive estimate for African American and Caucasian are %3f and %3f, and D_par=%3f.'%(pred_p_AA,pred_p_C,pred_p_AA-pred_p_C))
    print('Calibration Check: The rate of correct estimate for African American and Caucasian are %3f and %3f, and D_cal=%3f.'%(acc_AA,acc_C,acc_AA-acc_C))
    print('The False Positive Rate for African American and Caucasian are %3f and %3f, and D_FPR=%3f.'%(FPR_AA,FPR_C,FPR_AA-FPR_C))
    print('The False Negative Rate for African American and Caucasian are %3f and %3f, and D_FNR=%3f.'%(FNR_AA,FNR_C,FNR_AA-FNR_C))
    print('\n',"#"*80)
#print(y_pred.shape)
evaluation_DM(X_test1,y_test,y_pred)


 ################################################################################
The accuracy of baseline model LR is: 0.924084.
The False Positive Rate for overall population is: 0.084956.
The False Negative Rate for overall population is: 0.067126.
Specifically:
Parity Check: The rate of positive estimate for African American and Caucasian are 0.423729 and 0.538462, and D_par=-0.114733.
Calibration Check: The rate of correct estimate for African American and Caucasian are 0.932203 and 0.921978, and D_cal=0.010225.
The False Positive Rate for African American and Caucasian are 0.077465 and 0.087470, and D_FPR=-0.010006.
The False Negative Rate for African American and Caucasian are 0.053191 and 0.069815, and D_FNR=-0.016624.

 ################################################################################


If we do not put constaints on the loss function, the accuracy of the logistic regression model is around $93\%$, while the FPR, FNR is around 0.05 to 0.1. 

Then, we put the constraint in the following model:

In [None]:
# Constaints on loss function
np.random.seed(5243)
theta1 = cvx.Variable(X_train1.shape[1])
theta1.value = np.random.rand(theta.shape[0])

tau, mu, EPS = 0.5, 1.6, 1e-4 

def g_theta(y,X,theta):
    y = 2*y - 1
    d = matmul(X,theta)
    y_d = multiply(y,d)
    return minimum(np.zeros_like(y_d),y_d)

c = 0.05
N0 = X_train1_AA.shape[0]
N1 = X_train1_C.shape[0]
N = X_train1.shape[0]
print(N,N0,N1)


Prob2 = cvx.Problem(Minimize(lossfunc(X_train1,theta1,y_train)),
                 [N0/N*sum(g_theta(y_train_C,X_train1_C,theta1)) <= c + N1/N*sum(g_theta(y_train_AA, X_train1_AA,theta1)), 
                  N0/N*sum(g_theta(y_train_C,X_train1_C,theta1)) >= N1/N*sum(g_theta(y_train_AA, X_train1_AA,theta1)) - c]) # With constraints
print(dccp.is_dccp(Prob2))
result1 = Prob2.solve(method='dccp', tau=tau, mu=mu, tau_max=1e10, verbose=True)
#g_theta(y_train,X_train1, theta.value).value.shape
#constraint()
#X_train.
#pd.DataFrame(X_train)
#pd.DataFrame(x_train1)

4584 2745 1839
True


In [None]:
y_pred1 = predict(X_test1, theta1.value)
evaluation_DM(X_test1,y_test,y_pred1)


 ################################################################################
The accuracy of baseline model LR is: 0.924084.
The False Positive Rate for overall population is: 0.088496.
The False Negative Rate for overall population is: 0.063683.
Specifically:
Parity Check: The rate of positive estimate for African American and Caucasian are 0.427966 and 0.541758, and D_par=-0.113792.
Calibration Check: The rate of correct estimate for African American and Caucasian are 0.927966 and 0.923077, and D_cal=0.004889.
The False Positive Rate for African American and Caucasian are 0.084507 and 0.089835, and D_FPR=-0.005327.
The False Negative Rate for African American and Caucasian are 0.053191 and 0.065708, and D_FNR=-0.012517.

 ################################################################################


From above result, we may see the $DM_{sen}$ algorithm slightly drops the $D_{FNR}$ to around -0.015, which is very close to $D_{FPR}$. 

Then we implement $DM$:

In [None]:
X_train1_sen = np.delete(X_train1,2,1)
X_train1_AA_sen = X_train1_sen[train_idx_AA]
X_train1_C_sen = X_train1_sen[train_idx_C]
X_test1_sen = np.delete(X_test1,2,1)
X_test1_AA_sen = X_test1_sen[test_idx_AA]
X_test1_C_sen = X_test1_sen[test_idx_C]

print('\n',"#"*80,'\n',' '*20," Split up Train-Test sets ",'\n',"#"*80,'\n')
print(" X_train1_sen size: ", X_train1_sen.shape, ",      y_train size: ", y_train.shape, '\n',
      "X_test1_sen size: ", X_test1_sen.shape, ',       y_test size: ',y_test.shape)
print(" X_train1_AA_sen size: ", X_train1_AA_sen.shape, ",   X_train1_C_sen size: ", X_train1_C_sen.shape, '\n',
      "X_test1_AA_sen size: ", X_test1_AA_sen.shape, ',     X_test1_C_sen size: ',X_test1_C_sen.shape, '\n',
      "Ratio:", X_train1_AA_sen.shape[0]/X_train1_C_sen.shape[0],X_test1_AA_sen.shape[0]/X_test1_C_sen.shape[0])
print('\n',"#"*80)


 ################################################################################ 
                       Split up Train-Test sets  
 ################################################################################ 

 X_train1_sen size:  (4584, 15) ,      y_train size:  (4584,) 
 X_test1_sen size:  (1146, 15) ,       y_test size:  (1146,)
 X_train1_AA_sen size:  (2745, 15) ,   X_train1_C_sen size:  (1839, 15) 
 X_test1_AA_sen size:  (688, 15) ,     X_test1_C_sen size:  (458, 15) 
 Ratio: 1.4926590538336053 1.502183406113537

 ################################################################################


In [None]:
# Constaints on loss function
np.random.seed(5243)
theta2 = cvx.Variable(X_train1_sen.shape[1])
theta2.value = np.random.rand(theta2.shape[0])

tau, mu, EPS = 0.5, 1.6, 1e-4 
c = 0.05
N0 = X_train1_AA_sen.shape[0]
N1 = X_train1_C_sen.shape[0]
N = X_train1_sen.shape[0]
print(N,N0,N1)


Prob2 = cvx.Problem(Minimize(lossfunc(X_train1_sen,theta2,y_train)),
                 [N0/N*sum(g_theta(y_train_C,X_train1_C_sen,theta2)) <= c + N1/N*sum(g_theta(y_train_AA,X_train1_AA_sen,theta2)), 
                  N0/N*sum(g_theta(y_train_C,X_train1_C_sen,theta2)) >= N1/N*sum(g_theta(y_train_AA,X_train1_AA_sen,theta2)) - c]) # With constraints
print(dccp.is_dccp(Prob2))
result1 = Prob2.solve(method='dccp', tau=tau, mu=mu, tau_max=1e10, verbose=True)

4584 2745 1839
True


In [None]:
y_pred2 = predict(X_test1_sen, theta2.value)
evaluation_DM(X_test1_sen,y_test,y_pred2)


 ################################################################################
The accuracy of baseline model LR is: 0.925829.
The False Positive Rate for overall population is: 0.084956.
The False Negative Rate for overall population is: 0.063683.
Specifically:
Parity Check: The rate of positive estimate for African American and Caucasian are 0.423729 and 0.540659, and D_par=-0.116931.
Calibration Check: The rate of correct estimate for African American and Caucasian are 0.932203 and 0.924176, and D_cal=0.008028.
The False Positive Rate for African American and Caucasian are 0.077465 and 0.087470, and D_FPR=-0.010006.
The False Negative Rate for African American and Caucasian are 0.053191 and 0.065708, and D_FNR=-0.012517.

 ################################################################################


In [None]:
end = time.time()
print('The running time of overall algorithm is: %3fs.'%(end-start))

The running time of overall algorithm is: 642.106358s.


We may see $DM$ algorithm drops $D_{FNR}$. but from the above results, it's hard for us to see which one is perfect and how $DM_{sen}$ violates the disparate treatment. Overall speaking, this algorithm has an impact on controlling the difference in FPR and FNR, but the effect deserves further study as when few features are in the model, both two algorithms seem to have no effect on controlling our target.