# Logistic with constrains

Essay github: https://github.com/mbilalzafar/fair-classification

Essay : Maximizing accuracy under fairness constraints

## Import necessary library

In [1]:
import pandas as pd
import utils as ut
from sklearn.model_selection import train_test_split
import numpy as np
import loss_funcs as lf
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.calibration import calibration_curve
from sklearn.calibration import CalibratedClassifierCV
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from scipy.optimize import fmin_slsqp
import warnings

## Define Functions

In [2]:
def compute_p_rule(x_control, class_labels):

    """ Compute the p-rule based on Doctrine of disparate impact """

    non_prot_all = sum(x_control == 1.0) # non-protected group
    prot_all = sum(x_control == 0.0) # protected group
    non_prot_pos = sum(class_labels[x_control == 1.0] == 1.0) # non_protected in positive class
    prot_pos = sum(class_labels[x_control == 0.0] == 1.0) # protected in positive class
    frac_non_prot_pos = float(non_prot_pos) / float(non_prot_all)
    frac_prot_pos = float(prot_pos) / float(prot_all)
    p_rule = min((frac_non_prot_pos / frac_prot_pos) , (frac_prot_pos / frac_non_prot_pos))* 100.0

    return p_rule

def calculate_calibration_difference(model, X_test, y_test, sensitive_test, n_bins=10):

    # Predict the probabilities on the test set
    y_prob = model.predict_proba(X_test)[:, 1]  # Probabilities for the positive class

    # Get the indices for each group
    group1_idx = (sensitive_test == 1)
    group2_idx = (sensitive_test == 0)

    # Calculate the calibration curve for each group
    prob_true_group1, prob_pred_group1 = calibration_curve(y_test[group1_idx], y_prob[group1_idx], n_bins=n_bins, strategy='uniform')
    prob_true_group2, prob_pred_group2 = calibration_curve(y_test[group2_idx], y_prob[group2_idx], n_bins=n_bins, strategy='uniform')

    # Calculate the calibration difference
    calibration_difference = np.abs(prob_true_group1 - prob_true_group2)

    # Calculate the average calibration difference across bins
    average_calibration_difference = np.mean(calibration_difference)

    return average_calibration_difference

## Load and filter Dataset

In [3]:
warnings.filterwarnings("ignore")
# Load the dataset
file_path = '/content/compas-scores-two-years.csv'
compas_data = pd.read_csv(file_path)

# Getting attributes we want
selected_var=["sex","age","race","juv_fel_count","decile_score","juv_misd_count","juv_other_count","priors_count","c_days_from_compas","c_charge_degree","decile_score","v_decile_score","priors_count","two_year_recid"]

# Converting some of the features to be binary or categorical
compas_data=compas_data[selected_var]
filtered_data = compas_data[compas_data['race'].isin(['Caucasian', 'African-American'])]
filtered_data['race'] = filtered_data['race'].apply(lambda x: 1 if x == 'Caucasian' else 0)
filtered_data['class_label'] = filtered_data['two_year_recid'].apply(lambda x: 1 if x else 0)
filtered_data['sex'] = filtered_data['sex'].apply(lambda x: 1 if x == "Male" else 0)
filtered_data['c_charge_degree'] = filtered_data['c_charge_degree'].apply(lambda x: 1 if x == "M" else 0)
filtered_data.head(5)

# drop NaN data
filtered_data=filtered_data.dropna()

## Train test split

In [4]:
# Use train test split
filtered_data=filtered_data.drop(columns=["two_year_recid"])
y=filtered_data.class_label
x=filtered_data.drop(columns=["class_label","race"])
sensitive_attribute = filtered_data['race']
X_train, X_test, y_train, y_test, sensitive_train, sensitive_test = train_test_split(x, y, sensitive_attribute, test_size=0.3, random_state=42)

## Baseline: Logistic Regression

In [5]:
# Baseline: Logistic Regression
np.random.seed(110)
w = ut.train_model(X_train,
                   y_train,
                   x_control = {'race': sensitive_train},
                   loss_function = lf._logistic_loss,
                   apply_fairness_constraints = 0,
                   apply_accuracy_constraint = 0,
                   sep_constraint = 0,
                   sensitive_attrs = ['race'],
                   sensitive_attrs_to_cov_thresh = {'race': 0},
                   gamma = None)

In [6]:
print('Logistic Regression:')
warnings.filterwarnings("ignore")
model = LogisticRegression().fit(X_train, y_train)
lr_p_train = compute_p_rule(sensitive_train, model.predict(X_train))
lr_p_test = compute_p_rule(sensitive_test, model.predict(X_test))
lr_train_accuracy = model.score(X_train, y_train) * 100
lr_test_accuracy = model.score(X_test, y_test) * 100
lr_calibration = calculate_calibration_difference(model, X_test, y_test, sensitive_test, n_bins=10)

# Print out accuracies
print()
print("Accuracy (%)")
print("Training: {:.2f}%".format(lr_train_accuracy))
print("Test: {:.2f}%".format(lr_test_accuracy))
print()
print("p_rule (%)")
print("Training: {:.2f}%".format(lr_p_train))
print("Test: {:.2f}%".format(lr_p_test))
print()
print("Calibtraion (%)")
print("Test:{:.4f}%".format(lr_calibration))

Logistic Regression:

Accuracy (%)
Training: 68.01%
Test: 68.44%

p_rule (%)
Training: 53.20%
Test: 49.61%

Calibtraion (%)
Test:0.0577%


## Optimizing classifier accuracy subject to fairness constraints

In [7]:
# Optimizing classifier accuracy subject to fairness constraints
np.random.seed(150)
w = ut.train_model(X_train,
                   y_train,
                   x_control = {'race': sensitive_train},
                   loss_function = lf._logistic_loss,
                   apply_fairness_constraints = 1,
                   apply_accuracy_constraint = 0,
                   sep_constraint = 0,
                   sensitive_attrs = ['race'],
                   sensitive_attrs_to_cov_thresh = {'race': 0},
                   gamma = None)

In [8]:
# Feeding model with weights
m = LogisticRegression()
m.coef_= w.reshape((1,-1))
m.intercept_ = 0
m.classes_ = np.array([0, 1])

In [9]:
print('Logistic Regression with fairness constraints:')

warnings.filterwarnings("ignore")
LRFC_p_train = compute_p_rule(sensitive_train, m.predict(X_train))
LRFC_p_test = compute_p_rule(sensitive_test, m.predict(X_test))
LRFC_train_accuracy = m.score(X_train, y_train) * 100
LRFC_test_accuracy = m.score(X_test, y_test) * 100
LRFC_calibration = calculate_calibration_difference(m, X_test, y_test, sensitive_test, n_bins=10)

# Print out accuracies
print()
print("Accuracy (%)")
print("Training: {:.2f}%".format(LRFC_train_accuracy))
print("Test: {:.2f}%".format(LRFC_test_accuracy))
print()
print("p_rule (%)")
print("Training: {:.2f}%".format(LRFC_p_train))
print("Test: {:.2f}%".format(LRFC_p_test))
print()
print("Calibtraion (%)")
print("Test:{:.4f}%".format(LRFC_calibration))

Logistic Regression with fairness constraints:

Accuracy (%)
Training: 46.01%
Test: 48.02%

p_rule (%)
Training: 99.96%
Test: 99.91%

Calibtraion (%)
Test:0.2646%


## Optimizing classifier fariness subject to accuracy constraints

In [10]:
np.random.seed(110)
w = ut.train_model(X_train,
                   y_train,
                   x_control = {'race': sensitive_train},
                   loss_function = lf._logistic_loss,
                   apply_fairness_constraints = 0,
                   apply_accuracy_constraint = 1,
                   sep_constraint = 0,
                   sensitive_attrs = ['race'],
                   sensitive_attrs_to_cov_thresh = {'race': 0},
                   gamma = 0.8)

In [11]:
# Feeding model with coefficients and weights
m = LogisticRegression()
m.coef_= w.reshape((1,-1))
m.intercept_ = 0
m.classes_ = np.array([0, 1])

In [12]:
print('Logistic Regression with accuracy constraints:')

warnings.filterwarnings("ignore")
LRAC_p_train = compute_p_rule(sensitive_train, m.predict(X_train))
LRAC_p_test = compute_p_rule(sensitive_test, m.predict(X_test))
LRAC_train_accuracy = m.score(X_train, y_train) * 100
LRAC_test_accuracy = m.score(X_test, y_test) * 100
LRAC_calibration = calculate_calibration_difference(m, X_test, y_test, sensitive_test, n_bins=10)

# Print out accuracies
print()
print("Accuracy (%)")
print("Training: {:.2f}%".format(LRAC_train_accuracy))
print("Test: {:.2f}%".format(LRAC_test_accuracy))
print()
print("p_rule (%)")
print("Training: {:.2f}%".format(LRAC_p_train))
print("Test: {:.2f}%".format(LRAC_p_test))
print()
print("Calibtraion (%)")
print("Test:{:.4f}%".format(LRAC_calibration))

Logistic Regression with accuracy constraints:

Accuracy (%)
Training: 45.98%
Test: 47.96%

p_rule (%)
Training: 100.00%
Test: 100.00%

Calibtraion (%)
Test:0.1241%


## Result

In [16]:
models_data = [
    {'Model': 'Logitstic Regression', 'Accuracy_Train (%)': lr_train_accuracy, 'Accuracy_Test (%)': lr_test_accuracy, 'P_Rule_Train (%)': lr_p_train, 'P_Rule_Test (%)': lr_p_test, 'Calibration_Test (%)': lr_calibration},
    {'Model': 'LRC', 'Accuracy_Train (%)': LRFC_train_accuracy, 'Accuracy_Test (%)': LRFC_test_accuracy, 'P_Rule_Train (%)': LRFC_p_train, 'P_Rule_Test (%)': LRFC_p_test, 'Calibration_Test (%)': LRFC_calibration},
    {'Model': 'LRAC', 'Accuracy_Train (%)': LRAC_train_accuracy, 'Accuracy_Test (%)': LRAC_test_accuracy, 'P_Rule_Train (%)': LRAC_p_train, 'P_Rule_Test (%)': LRAC_p_test, 'Calibration_Test (%)': LRAC_calibration}
]

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(models_data)

# Reorder DataFrame to have 'Model' column first
df = df[['Model', 'Accuracy_Train (%)', 'Accuracy_Test (%)', 'P_Rule_Train (%)', 'P_Rule_Test (%)', 'Calibration_Test (%)']]

# Display the DataFrame
print(df.to_string(index=False))

               Model  Accuracy_Train (%)  Accuracy_Test (%)  P_Rule_Train (%)  P_Rule_Test (%)  Calibration_Test (%)
Logitstic Regression           68.009313          68.441065         53.203465        49.607920              0.057688
                 LRC           46.006985          48.017382         99.961165        99.910072              0.264626
                LRAC           45.983702          47.963064        100.000000       100.000000              0.124113


Logistic Regression with constraints have really good p rule even their accuracy is not that high.

**We believe that logistic regression with constrains is better.**


We can notice that baseline(logistic regression) has highest Accuracy but low P_rule. This shows that inequity exists in BASELINE, which may result in serious consequences, especially on a topic like RACE.


As to whether to choose the constrain of ACCURACY or the constrain of FAIRNESS, you need to choose the model according to the actual situation. For example, like the current topic related to race, we think that a higher p-rule value can avoid the potential risk of racial discrimination.