In [2]:
import numpy as np
from random import seed, shuffle
from scipy.optimize import minimize 
from multiprocessing import Pool, Process, Queue
from collections import defaultdict
from copy import deepcopy
import matplotlib.pyplot as plt 
from sklearn.metrics import accuracy_score as accuracy_score

In [3]:
#defining loss function of logistic regression

def log_logistic(X):
    out = np.empty_like(X)
    ind = X >0
    out[ind] = -np.log(1.0+np.exp(-X[ind]))
    out[~ind] = X[~ind] - np.log(1.0+np.exp(X[~ind]))
    return out

def loss_function(w,X,y,return_arr = None):
    yz = y*np.dot(X,w)
    if return_arr == True:
        out = -(log_logistic(yz))
    else:
        out = -np.sum(log_logistic(yz))
    return out

In [4]:
def train_model(x,y,x_sensitive,loss_function,sep_cons,gamma):
    max_iter = 100000
    contraint = []
    #first train with no constraint to get L(theta*)
    w = minimize(fun=loss_function,x0 = np.random.rand(x.shape[1],),args = (x,y),method = 'SLSQP', options = {'maxiter':max_iter}, constraints=[])
    old_w = deepcopy(w.x)
    unconstrained_loss_arr = loss_function(old_w,x,y,return_arr=True)

    def constraint_gamma_all(w,x,y,initial_loss_arr):
        gamma_arr = np.ones_like(y)*gamma
        new_loss = loss_function(w,x,y)
        old_loss = np.sum(initial_loss_arr)
        return((1.0+gamma)*old_loss)-new_loss
        
    def constraint_protected_people(w,x,y):
        #for fine-gamma, constraint to prevent non-protected user be classify as negative
        return(np.dot(w,x.T))
    
    def constraint_unprotected_people(w,ind,old_loss,x,y):
        new_loss = loss_function(w,np.array(x),np.array(y))
        return((1.0+gamma)*old_loss)-new_loss

    constraints = []
    predicted_labels = np.sign(np.dot(w.x,x.T))

    if sep_cons == False:
        #gamma_LR
        con = ({'type':'ineq','fun':constraint_gamma_all,'args':(x,y,unconstrained_loss_arr)})
        constraints.append(con)
    else:
        #fine-gamma LR
        for i in range(0,len(predicted_labels)):
            if predicted_labels[i]==1.0 and x_sensitive[i] == 1.0:
                con = ({'type':'ineq','fun':constraint_protected_people,'args':(x[i],y[i])})
            else:
                con = ({'type':'ineq','fun':constraint_unprotected_people,'args':(i,unconstrained_loss_arr[i],x[i],y[i])})
            constraints.append(con)
    
    def opt_function(w,x,x_sensitive):
        covariance = (x_sensitive - np.mean(x_sensitive))*np.dot(w,x.T)
        return float(abs(sum(covariance)))/float(x.shape[0]) #equation2
    
    #train the gamma/fine-gamma lr according to the cross covariance
    w = minimize(fun = opt_function,x0=old_w,args=(x,x_sensitive),method='SLSQP',options ={'maxiter':max_iter},constraints=constraints)

    return w.x

In [47]:
def predict(w,x):
    z = np.dot(w,x.T)
    y = 1/(1+np.exp(-z))
    y = (y>=0.5)
    y = y.astype('float64')
    return y

In [82]:
#evaluation functions
def get_calibration(y_pred, y_true,x_sensitive):
    y_pred = y_pred.astype('float64')
    y_true = y_true.astype('float64')
    x_sensitive = x_sensitive.astype('float64')
    idx = (x_sensitive == 1.0)
    p1 = np.mean(y_pred[idx]==y_true[idx])
    p0 = np.mean(y_pred[~idx]==y_true[~idx])
    out = p1-p0
    return out
def evaluation(y_pred,y_true,x_sensitive):
    cal = get_calibration(y_pred,y_true,x_sensitive)
    acc  = accuracy_score(y_pred,y_true)
    print("The accuracy score is: ",acc)
    print('The calibration score is:',cal)    
    

In [83]:
x =np.array([[1,2,3,5,2],[1,4,3,2,3],[1,6,3,4,2],[1,2,5,6,3]])
y = np.array([0,1,1,0])
x_sensitive = np.array([1,0,1,0])
w = train_model(x,y,x_sensitive,loss_function,sep_cons=True,gamma=0.5)
y_pred = predict(w,x)
get_calibration(y_pred,y,x_sensitive)
evaluation(y_pred,y,x_sensitive)

The accuracy score is:  0.5
The calibration score is: 0.0
