# Introduction
In this notebook we will take some data and try to solve multiclass classification problem with fairness constraint 'independence'. The data will be taken from fairlearn package; we will try to predict age group of an induvidual 
using his information. The sensitive feature will be gender.

In [1276]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import scipy.optimize

import fairlearn
from fairlearn.metrics import MetricFrame
from fairlearn.datasets import *

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix
import lightgbm as lgb

In [1277]:
# take a data set from fairlearn and then prepare it for classification
data = fetch_adult(as_frame = True)
full_d = data['data']
full_d['salary'] = data['target'].map({'<=50K':0, '>50K':1})

del full_d['education-num']
dum_cols = ['workclass', 'education', 'marital-status', 'occupation', 'relationship','race','sex','native-country']
full_d = pd.get_dummies(full_d,columns = dum_cols,dummy_na = True, drop_first = True)

# this function split numerical age into one of three age groups
def old(x):
    if x <35: 
        return 0
    elif 35<= x<= 55:
        return 1
    else:
        return 2
full_d.age = full_d.age.apply(old)

In [1278]:
# randomly shuffle the data set to get more unbiased results
full_d = full_d.sample(full_d.shape[0])

In [1279]:
# we take 14000 first observations; more observations cannot be taken due the 
# complexity of the linear-programming problem
d = full_d[0:14000]

# create train and test observations
y = d.drop(['age'],axis=1)
x = d['age']
y_train,y_test,x_train,x_test = train_test_split(y,x)

In [1280]:
# distribution of age group
d.age.value_counts()

1    6306
0    5976
2    1718
Name: age, dtype: int64

In [1281]:
# accuracy of the baseline predictor
d.age.value_counts()[0]/14000

0.4268571428571429

In [1282]:
# we take some classifier that gives significant improvement against the baseline predictor and
# fit it on the data
lg = lgb.LGBMClassifier()
lg.fit(y_train,x_train)
lg_pred= lg.predict(y_test)
accuracy_score(lg_pred, x_test)

0.6845714285714286

# Fairness approach
In this section we train the fair classifier


In [1283]:
# here we create male/female train/test features/labels
female_train_features = y_train[y_train['sex_Male']==0]
male_train_features = y_train[y_train['sex_Male']==1]
female_train_labels = x_train[female_train_features.index]
male_train_labels = x_train[male_train_features.index]

female_test_features = y_test[y_test['sex_Male']==0]
male_test_features = y_test[y_test['sex_Male']==1]
female_test_labels = x_test[female_test_features.index]
male_test_labels = x_test[male_test_features.index]

# f,m stand for amount of females and males in the training set
f = female_train_features.shape[0]
m = male_train_features.shape[0]

In [1284]:
# look at the fairness results
mf= MetricFrame(metrics=accuracy_score, y_pred=lg_pred, y_true=x_test,sensitive_features = y_test['sex_Male'])
mf.by_group

sex_Male
0    0.712212
1    0.670674
Name: accuracy_score, dtype: object

In [1285]:
# here we create vectors of male/female probabilities. 
#Later on they will be passed as an input to the linear programming problem
male_train_probs = lg.predict_proba(male_train_features)
female_train_probs = lg.predict_proba(female_train_features)

In [1286]:
# here we create arguments that will be passed to the linear programming problem
m = male_train_probs.shape[0]
f = female_train_probs.shape[0]

C = male_train_probs.ravel()
B = female_train_probs.ravel()
objective = (-1)*np.concatenate((C,B))

bounds = []
for i in range(3*m+3*f):
    bounds.append((0,1))

equation_vector = [1]*(m+f)
for i in range(3):
    equation_vector.append(0)

equation_matrix = np.zeros((m+f+3,3*f+3*m))
for i in range(f+m):
    equation_matrix[i,3*i] = 1
    equation_matrix[i,3*i+1] = 1
    equation_matrix[i,3*i+2] = 1
for i in range(3):
    for j in range(m):
        equation_matrix[f+m+i,3*j+i] = f
    for j in range(f):
        equation_matrix[f+m+i, 3*m+3*j+i]=-m

In [1287]:
%%time

#solving linear programm; this is the most computationaly consuming part 
array = scipy.optimize.linprog(
    c = objective, A_ub=None, b_ub=None, 
                       A_eq=equation_matrix, 
                       b_eq=equation_vector, 
    bounds=bounds, method='highs-ipm', callback=None, options=None, x0=None).x
array 

Wall time: 12.6 s


array([1., 0., 0., ..., 0., 1., 0.])

In [1288]:
#here we create functions that convert 0-1 array obtained above to classes 0,1 and 2 
def slice_to_class(l):
    if np.array_equal(l,np.array([1,0,0])):
        return 0
    elif np.array_equal(l,np.array([0,1,0])):
        return 1
    elif np.array_equal(l,np.array([0,0,1])):
        return 2
def zeros_ones_to_classes(x,length = 3):
    x = np.around(x)
    n = int(len(x)/length)
    l = []
    for i in range(n):
        z = x[i*length:i*length+length]
        l.append(slice_to_class(z))
    return np.array(l, dtype=float)

In [1289]:
# finally create vectors of fair predictions
fair_pred = zeros_ones_to_classes(array)
fair_pred_male = fair_pred[:m]
fair_pred_female = fair_pred[m:]

In [1290]:
# the results on training set are fair now
print(accuracy_score(fair_pred_female,female_train_labels),accuracy_score(fair_pred_male,male_train_labels))

0.7615516257843696 0.7523591649985703


In [1291]:
# here we prepare classes to relabeling
mdf = pd.DataFrame(male_train_probs, columns = ['zero_class', 'first_class','second_class'])
male_features_after_classif = mdf.copy()
mdf['fair'] = fair_pred_male
fdf = pd.DataFrame(female_train_probs, columns = ['zero_class', 'first_class','second_class'])
female_features_after_classif = fdf.copy()
fdf['fair'] = fair_pred_female

In [1292]:
# create male and female random forest classifiers that will improve fairness
m_rf = RandomForestClassifier()
m_rf.fit(male_features_after_classif,mdf['fair'])
f_rf = RandomForestClassifier()
f_rf.fit(female_features_after_classif,fdf['fair']);

# Testing on different data

In this section we consider all other observations and validate our approach on them. We will take
some classifier (Light Gradient Boosting) and then relabel its predictions by the random forests obtained 
in the previous part.

In [1293]:
# take the rest of the data
validation_data = full_d[14000:]
validation_y = validation_data.drop(['age'],axis=1)
validation_x = validation_data['age']

In [1294]:
# fit and test LGBMClassifier on the validation data
validation_y_train,validation_y_test,validation_x_train,validation_x_test = train_test_split(validation_y,validation_x)
validation_lg = lgb.LGBMClassifier()
validation_lg.fit(validation_y_train,validation_x_train)
validation_lg_pred= validation_lg.predict(validation_y_test)
print(accuracy_score(validation_lg_pred, validation_x_test, normalize = True),
      accuracy_score(validation_lg_pred, validation_x_test, normalize = False))

0.6849959820916083 5967


In [1295]:
# take male and feale parts of the test set; we need this to evaluate fairness
validation_female_test_features = validation_y_test[validation_y_test['sex_Male']==0]
validation_male_test_features = validation_y_test[validation_y_test['sex_Male']==1]
validation_female_test_labels = validation_x_test[validation_female_test_features.index]
validation_male_test_labels = validation_x_test[validation_male_test_features.index]

In [1296]:
# here we compute accuracy on male and female parts; we also compute unfairness, i.e., the 
# diffrence between male accuracy and female accuracy. This quantity we will try to minimize. 
validation_male_test_ans = validation_lg.predict(validation_male_test_features)
validation_female_test_ans = validation_lg.predict(validation_female_test_features)
print(accuracy_score(validation_male_test_ans, validation_male_test_labels, normalize = True),
accuracy_score(validation_female_test_ans, validation_female_test_labels, normalize = True))
abs(accuracy_score(validation_male_test_ans, validation_male_test_labels, normalize = True)-
accuracy_score(validation_female_test_ans, validation_female_test_labels, normalize = True))

0.6735183281945872 0.7083188304907762


0.03480050229618892

Fairness post-processing

In [1297]:
# here we obtain probabilities for the test observations
validation_male_test_probs = validation_lg.predict_proba(validation_male_test_features)
validation_female_test_probs = validation_lg.predict_proba(validation_female_test_features)

In [1298]:
# apply random forests to relabel predictions
male_relabeled_pred = m_rf.predict(validation_male_test_probs)
female_relabeled_pred = f_rf.predict(validation_female_test_probs)

print(accuracy_score(male_relabeled_pred, validation_male_test_labels), 
accuracy_score(female_relabeled_pred, validation_female_test_labels))

# unfairness
abs(accuracy_score(male_relabeled_pred, validation_male_test_labels)-
accuracy_score(female_relabeled_pred, validation_female_test_labels))

0.6615279205207263 0.6950922380786634


0.033564317557937096

In [1299]:
# final accuracy;
print(accuracy_score(male_relabeled_pred, validation_male_test_labels, normalize = False)+
accuracy_score(female_relabeled_pred, validation_female_test_labels, normalize = False))

5859


In [1300]:
# here the results after 20 trials; 
# each number in fair array means fair error, i.e., the difference between male acuracy
# and female accuracy after post-processing; the numbers in unfair array means the same for
# the unfair predictions
fair = np.array([52,40,35,34,40,11,26,25,41,7,43,32,29,42,15,47,35,39,35,11])*(1/100)
unfair = np.array([46,34,44,51,36,11,39,35,30,21,52,37,39,44,22,53,40,37,45,22])*(1/100)

In [1301]:
fair.sum(), unfair.sum()

(6.389999999999999, 7.38)

# Conclusion
After some trials it seems that this approach improves fairness measure 'independence' a little bit. The improvement seems to be quite small but very probably it exists. 
The accuracy typically goes down a litlle bit after post-processing