# Introduction
In this notebook we will take some data and try to solve multiclass classification problem with fairness constraint 'independence'. The data will be taken from fairlearn package; we will try to predict age group of an induvidual 
using this information. The sensitive feature will be gender.

In [583]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import scipy.optimize

import fairlearn
from fairlearn.metrics import MetricFrame
from fairlearn.datasets import *

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix
import lightgbm as lgb

In [584]:
# take a data set from fairlearn and then prepare it for classification
data = fetch_adult(as_frame = True)
full_d = data['data']
full_d['salary'] = data['target'].map({'<=50K':0, '>50K':1})

del full_d['education-num']
dum_cols = ['workclass', 'education', 'marital-status', 'occupation', 'relationship','race','sex','native-country']
full_d = pd.get_dummies(full_d,columns = dum_cols,dummy_na = True, drop_first = True)

# this function split numerical age into one of three age groups
def old(x):
    if x <35: 
        return 0
    elif 35<= x<= 55:
        return 1
    else:
        return 2
full_d.age = full_d.age.apply(old)

In [585]:
# randomly shuffle the data set to get more unbiased results
number_observ = full_d.shape[0]
full_d = full_d.sample(number_observ)

In [586]:
# we take 14000 first observations; more observations cannot be taken due the 
# complexity of the linear-programming problem
d = full_d
# create train and test observations
y = d.drop(['age'],axis=1)
x = d['age']
y_train,y_test,x_train,x_test = train_test_split(y,x)

In [587]:
# distribution of age group
d.age.value_counts()

1    21585
0    21009
2     6248
Name: age, dtype: int64

In [588]:
# accuracy of the baseline predictor
d.age.value_counts().max()/d.shape[0]

0.44193521968797345

In [589]:
# we take some classifier that gives significant improvement against the baseline predictor and
# fit it on the data
lg = lgb.LGBMClassifier()
lg.fit(y_train,x_train)
lg_pred= lg.predict(y_test)
accuracy_score(lg_pred, x_test)

0.6862664810416838

# Fairness approach; training
In this section we train the fair classifier


In [590]:
# here we create male/female train/test features/labels
female_train_features = y_train[y_train['sex_Male']==0]
male_train_features = y_train[y_train['sex_Male']==1]
female_train_labels = x_train[female_train_features.index]
male_train_labels = x_train[male_train_features.index]

female_test_features = y_test[y_test['sex_Male']==0]
male_test_features = y_test[y_test['sex_Male']==1]
female_test_labels = x_test[female_test_features.index]
male_test_labels = x_test[male_test_features.index]

# f,m stand for amount of females and males in the training set
f = female_train_features.shape[0]
m = male_train_features.shape[0]

In [591]:
# look at the fairness results
mf= MetricFrame(metrics=accuracy_score, y_pred=lg_pred, y_true=x_test,sensitive_features = y_test['sex_Male'])
mf.by_group

sex_Male
0    0.708075
1    0.675544
Name: accuracy_score, dtype: object

In [592]:
# here we create vectors of male/female probabilities. 
#Later on they will be passed as an input to the linear programming problem
male_train_probs = pd.DataFrame(lg.predict_proba(male_train_features)).rename(
    columns = {0:'zero_class',1:'first_class', 2:'second_class'})
female_train_probs = pd.DataFrame(lg.predict_proba(female_train_features)).rename(
    columns = {0:'zero_class',1:'first_class', 2:'second_class'})

In [593]:
#here we compute amount of males and females in each sample and sample size
m_ratio = m/(m+f)
f_ratio = f/(m+f)
group = int(np.sqrt(m+f))
m_group = int(m_ratio*group)
f_group = int(f_ratio*group)
#m_ratio, f_ratio, group, m_group, f_group

(0.6678496355545849, 0.33215036444541507, 191, 127, 63)

In [594]:
#here we create function that converts 0-1 array to classes 0,1 and 2   
def zeros_ones_to_classes(x,length = 3):
    n = int(len(x)/length)
    l = []
    for i in range(n):
        z = x[i*length:i*length+length]
        l.append(z.argmax())
    return np.array(l, dtype=int)

In [596]:
# here we create male and female random forest arrays
male_forest_array = []
female_forest_array = []

In [597]:
# here we solve linear programming problems and create a set of male/female random froest classifiers
%%time

# create parameters for linear progrmamms; their are the same for all samples
m = m_group
f = f_group

bounds = []
for i in range(3*m+3*f):
    bounds.append((0,1))

equation_vector = [1]*(m+f)
for i in range(3):
    equation_vector.append(0)

equation_matrix = np.zeros((m+f+3,3*f+3*m))
for i in range(f+m):
    equation_matrix[i,3*i] = 1
    equation_matrix[i,3*i+1] = 1
    equation_matrix[i,3*i+2] = 1
for i in range(3):
    for j in range(m):
        equation_matrix[f+m+i,3*j+i] = f
    for j in range(f):
        equation_matrix[f+m+i, 3*m+3*j+i]=-m
x= 5*group

#solving linear programm; each solution will result in one male and one female random forest
for k in range(x):
    male_sample = np.array(male_train_probs.sample(m_group))
    female_sample = np.array(female_train_probs.sample(f_group))
    C = male_sample.ravel()
    B = female_sample.ravel()
    objective = (-1)*np.concatenate((C,B))

    array = scipy.optimize.linprog(
        c = objective, A_ub=None, b_ub=None, 
                       A_eq=equation_matrix, 
                       b_eq=equation_vector, 
        bounds=bounds, method='highs-ipm', callback=None, options=None, x0=None).x

# finally create vectors of fair predictions
    fair_pred = zeros_ones_to_classes(array)
    fair_pred_male = fair_pred[:m]
    fair_pred_female = fair_pred[m:]

# here we prepare classes to relabeling
    mdf = pd.DataFrame(male_sample, columns = ['zero_class', 'first_class','second_class'])
    male_features_after_classif = mdf.copy()
    mdf['fair'] = fair_pred_male
    fdf = pd.DataFrame(female_sample, columns = ['zero_class', 'first_class','second_class'])
    female_features_after_classif = fdf.copy()
    fdf['fair'] = fair_pred_female

# create male and female random forest classifiers 
    m_rf = RandomForestClassifier()
    m_rf.fit(male_features_after_classif,mdf['fair'])
    f_rf = RandomForestClassifier()
    f_rf.fit(female_features_after_classif,fdf['fair']);
    male_forest_array.append(m_rf)
    female_forest_array.append(f_rf)

Wall time: 3min 42s


# Fairness approach: testing 
In this section we will test performance of the algoritm on test set

In [598]:
#save random forests arrays; both have length = 5*group
male_forest_array_copy = male_forest_array.copy()
female_forest_array_copy = female_forest_array.copy()

In [599]:
# consider male and female test parts
female_test_features = y_test[y_test['sex_Male']==0]
male_test_features = y_test[y_test['sex_Male']==1]
female_test_labels = x_test[female_test_features.index]
male_test_labels = x_test[male_test_features.index]

In [601]:
# get predictions/probabilities on male and female parts
val_male_predictions = lg.predict(male_test_features)
val_female_predictions = lg.predict(female_test_features)
val_male_probs = lg.predict_proba(male_test_features)
val_female_probs = lg.predict_proba(female_test_features)

In [602]:
# prepare to create the matrices of male/female predictions
val_male_index = male_test_features.index
val_female_index = female_test_features.index

male_rows = val_male_index.shape[0]
male_cols = len(male_forest_array)
female_rows = val_female_index.shape[0]
female_cols = len(female_forest_array)

In [604]:
%%time
# male matrices of predictions; each column is a result of applying one of male random forests on whole male-test set 
male_final_array = np.empty(shape = (male_cols,male_rows))
for i in range(male_cols):
    male_final_array[i] = male_forest_array[i].predict(val_male_probs)
male_final_array = pd.DataFrame(male_final_array)

Wall time: 52.1 s


In [605]:
%%time
# female matrices of predictions; each column is a result of applying one of female random forests on whole female-test set 
female_final_array = np.empty(shape = (female_cols,female_rows))
for i in range(female_cols):
    female_final_array[i] = female_forest_array[i].predict(val_female_probs)
female_final_array = pd.DataFrame(female_final_array)

Wall time: 34.9 s


In [606]:
# create of answer lists; 
male_final_ans = []
for i in range(male_rows):
    male_final_ans.append(male_final_array[i].value_counts().sort_values(ascending = False).index[0])
female_final_ans = []
for i in range(female_rows):
    female_final_ans.append(female_final_array[i].value_counts().sort_values(ascending = False).index[0])

In [607]:
# getting accuracy scores and unfairness for unfair model
print(accuracy_score(val_male_predictions, male_test_labels),accuracy_score(val_female_predictions, female_test_labels),
      abs(accuracy_score(val_male_predictions, male_test_labels)-accuracy_score(val_female_predictions, female_test_labels)))

0.6755436110432446 0.7080745341614907 0.032530923118246124


In [608]:
# getting accuracy scores and unfairness for fair model
print(accuracy_score(male_final_ans,male_test_labels), accuracy_score(female_final_ans,female_test_labels),
     abs(accuracy_score(male_final_ans,male_test_labels)- accuracy_score(female_final_ans,female_test_labels)))

0.6705350598582946 0.7001242236024845 0.02958916374418985


In [609]:
# create lists of results
unfair.append(abs(accuracy_score(val_male_predictions, male_test_labels)-accuracy_score(val_female_predictions, female_test_labels)))
fair.append(abs(accuracy_score(male_final_ans,male_test_labels)- accuracy_score(female_final_ans,female_test_labels)))

# Analysis of the Results

In [620]:
# the results after 18 trial; each trial takes approximately 6 min
unfair = np.array([0.04356870247481859,
 0.05430315270860586,
 0.029528792445345675,
 0.05546402471844358,
 0.02119821752307005,
 0.047011019238018426,
 0.045165563376309836,
 0.04287649481781064,
 0.02604227707563156,
 0.04022144595231869,
 0.03558647368699175,
 0.05275884645572859,
 0.025414585281194446,
 0.035362281125826844,
 0.03441691871812913,
 0.03188968658298774,
 0.01902837931208823,
 0.032530923118246124])

fair = np.array([0.0363766390288528,
 0.0519313916748908,
 0.023111563864160245,
 0.05051368316609928,
 0.02577564874560523,
 0.03447997472073139,
 0.035409152816624156,
 0.03974337471362488,
 0.027791181655296482,
 0.026664714110636756,
 0.026553291411335445,
 0.050952305362546135,
 0.024845164949514165,
 0.02961086302534288,
 0.02863560910493712,
 0.02737896085731084,
 0.01679196613122691,
 0.02958916374418985])

In [623]:
# some statistics about the results
fair.sum(), unfair.sum(),fair.mean(), unfair.mean(), fair.var(), unfair.var()

(0.5861546490829254,
 0.6723677846115658,
 0.03256414717127363,
 0.037353765811753654,
 9.503486227837096e-05,
 0.00011564399535887001)

After some trials it seems that this approach improves fairness measure 'independence' a little bit.The improvement seems to be quite small but still it exists. The accuracy typically goes down a litlle bit after post-processing.