# Baseline measures

Step1. Import packages

The sub-package used to compute the baseline measures is aif360.sklearn. This package allows users to apply the bias metrics on their own datasets. For more information, please refer to
https://github.com/Trusted-AI/AIF360/tree/master/aif360/sklearn.

In [1]:
import numpy as np
import pandas as pd
import random
from sklearn.preprocessing import LabelEncoder
!pip install 'aif360[OptimPreproc]' 

from sklearn.linear_model import LogisticRegression, LogisticRegressionCV, SGDClassifier
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from aif360.sklearn.metrics import consistency_score,generalized_entropy_error,generalized_entropy_index,theil_index,coefficient_of_variation
from aif360.sklearn.metrics import statistical_parity_difference,disparate_impact_ratio,equal_opportunity_difference,average_odds_difference
from aif360.sklearn.datasets import standardize_dataset, to_dataframe
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier

Installing collected packages: slicer, shap, memory-profiler, tempeh, aif360
Successfully installed aif360-0.4.0 memory-profiler-0.60.0 shap-0.40.0 slicer-0.0.7 tempeh-0.1.12


Preprocess dataset

In [59]:
df = pd.read_csv('german.data', na_values='?', header=None, sep=' ')
cols = ['Status_of_existing_checking_account','Duration_in_month', 'Credit_history', 'Purpose', 'Credit_amount', 'Savings_accountbonds', 'Present_employment_since', 'Installment_rate_in_percentage_of_disposable_income', 'Personal_status_and_sex', 'Other_debtorsguarantors', 'Present_residence_since', 'Property', 'Age_in_years', 'Other_installment_plans', 'Housing', 'Number_of_existing_credits_at_this_bank', 'Job', 'Number_of_people_being_liable_to_provide_maintenance_for', 'Telephone', 'Foreign_worker', 'Creditworthiness']
df.columns = cols

# Since the numeric variable 'Number_of_people_being_liable_to_provide_maintenance_for' is dichotomous, it's going to be treated as a nominal variable.
df['Number_of_people_being_liable_to_provide_maintenance_for'] = df['Number_of_people_being_liable_to_provide_maintenance_for'].astype('object')
#df['Creditworthiness'] = df['Creditworthiness'].astype('object')

# specify numeric and nominal columns
numeric = [False if df[col].dtype == 'object' else True for col in df]
nominal = [True if df[col].dtype == 'object' else False for col in df]

# normalize numeric variables
num=df.loc[:,numeric].values[:,:-1] # exclude target variable
scaled=np.subtract(num,np.min(num,axis=0))/np.subtract(np.max(num,axis=0),np.min(num,axis=0))
df[df.columns[numeric][:-1]] = pd.DataFrame(scaled, columns=df.columns[numeric][:-1])

# recode 'Personal_status_and_sex' based on AIF360's preprocessing
df['Personal_status_and_sex'] = np.where(df['Personal_status_and_sex'] == 'A92', 'female', 'male')

# label encode nominal variables
lb = LabelEncoder()
for col in df[df.columns[nominal]]:
  df[col] = lb.fit_transform(df[col])

Step2. Preprocess dataset based on AIF360's guidelines and Initialize objects.

For more information about preprocessing please refer to https://aif360.readthedocs.io/en/latest/modules/generated/aif360.sklearn.datasets.standardize_dataset.html#aif360.sklearn.datasets.standardize_dataset.  

In [60]:
# preprocess data following aif360.sklearn instructions
X,y = standardize_dataset(df,prot_attr=['Personal_status_and_sex','Age_in_years'], target = 'Creditworthiness')

Step3. Compute individal and group fairness baseline measures

**Individual fairness metrics**:
- Consistency score: measures how similar the labels are for similar instances
- Generalised entropy error: measures inequality over a population. This algorithm compares the predictions made by a classifier with the ground truth. To that end, a LogisticRegression is used. Note that no test-train split is made as well as no hyperparameter tuning. 

First, we compute measures using all attributes in the dataset. 

In [61]:
# Dataset names: German, Compas, Titanic, Synthetic3
dataset_name = 'German' 
prot1 =  'Personal_status_and_sex'
prot2 = 'Age_in_years' 
target = 'Creditworthiness'
pos_label = 1

# initialize objects
dataset = [] # dataset name
consistency = [] # consistency scores before and after excluding protected features
generalized_entropy = [] # GEE before and after excluding protected features

In [62]:
# Consistency score including all attributes in the dataset
name = dataset_name+'_all_attributes'
dataset.append(name) # 

X,y = standardize_dataset(df,prot_attr=[prot1,prot2],target=target)
y = y.astype('float64')
consistency.append(consistency_score(X, y))
neigh = KNeighborsClassifier(n_neighbors=5).fit(X, y.astype('int64'))
#print(neigh.score(X,y.astype('int64')))


# Consistency score excluding a protected attribute from the dataset
name = dataset_name+'_excl_'+prot1
dataset.append(name)
X,y = standardize_dataset(df,prot_attr=[prot1,prot2],dropcols=[prot1],target=target)
y = y.astype('float64')

consistency.append(consistency_score(X, y))
neigh = KNeighborsClassifier(n_neighbors=5).fit(X, y)
#print(neigh.score(X,y))

# Consistency score excluding the other protected attribute from the dataset
name = dataset_name+'_excl_'+prot2
dataset.append(name) 

# excl prot2
X,y = standardize_dataset(df,prot_attr=[prot1,prot2],
                          dropcols=[prot2],target=target)
y = y.astype('float64')

consistency.append(consistency_score(X, y))
neigh = KNeighborsClassifier(n_neighbors=5).fit(X, y)
#print(neigh.score(X,y))

In [None]:
# Generalized Entropy Error including all attributes in the dataset
X,y = standardize_dataset(df,prot_attr=[prot1,prot2],target=target)
y = y.astype('float64')

model = LogisticRegression(max_iter=1000,random_state=1).fit(X,y)
y_pred = model.predict(X)
#print(model.score(X,y))

generalized_entropy.append(generalized_entropy_error(y, y_pred,pos_label=pos_label))

# Generalized Entropy Error excluding a protected attribute from the dataset
X,y = standardize_dataset(df,prot_attr=[prot1,prot2],dropcols=[prot1],target=target)
y = y.astype('float64')
model = LogisticRegression(max_iter=1000,random_state=1)
model.fit(X,y)
y_pred = model.predict(X)
#print(model.score(X,y))

generalized_entropy.append(generalized_entropy_error(y, y_pred,pos_label=pos_label))

# Generalized Entropy Error excluding another protected attribute from the dataset
X,y = standardize_dataset(df,prot_attr=[prot1,prot2],dropcols=[prot2],target=target)
y = y.astype('float64')
model = LogisticRegression(max_iter=1000,random_state=1)
model.fit(X,y)
y_pred = model.predict(X)
#print(model.score(X,y))

generalized_entropy.append(generalized_entropy_error(y, y_pred,pos_label=pos_label))

Second, we exclude the attribute gender from the dataset and compute measures once more.

In [64]:
baseline = pd.concat((np.round(pd.Series(consistency, name='Consistency'),3),np.round(pd.Series(generalized_entropy, name='GEE'),3)),1)
baseline.index = dataset
baseline

In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only


Unnamed: 0,Consistency,GEE
German_all_attributes,0.748,0.093
German_excl_Personal_status_and_sex,0.745,0.096
German_excl_Age_in_years,0.747,0.093


## Group Fairness

**Group fairness metrics**:
- Statistical parity difference
- Disparate impact
- Equal opportunity difference
- Average odds difference

In [75]:
dataset_name = 'German' 
prot1 =  'Personal_status_and_sex' 
prot2 = 'Age_in_years' 
target = 'Creditworthiness' 
pos_label = 1

# initialize objects
dataset = [] # scenario
stat_par = [] 
disp_im = []
eq_opp = []
ave_odds = []

Group fairness metrics require numeric features to be discretized. Based on the literature, 'Age' is discretized in the following manner: people older or equal to 25 years old are 'old' (0) and people younger than 25 are 'young' (1).

In [76]:
# preprocess data following aif360.sklearn instructions
X,y = standardize_dataset(df,prot_attr=[prot1,prot2],target=target)
y = y.astype('float')

# discretize age
age_in_years = df.Age_in_years * (df_raw.Age_in_years.max() - df_raw.Age_in_years.min()) + df_raw.Age_in_years.min()
X['Age_in_years'] = age_in_years.values
X.Age_in_years = np.where(X.Age_in_years>25,int(0),int(1)) # only for German credit

model = LogisticRegression(max_iter=1000,random_state=1)
model.fit(X,y)
y_pred = model.predict(X)

We compute the four group fairness measures by setting `prot_attr` parameter to the index of the protected attribute.

First, we compute the metrics focusing on gender. `priv_group` is 1, i.e. males.

In [77]:
dataset.append('Personal_status_and_sex/female')
stat_par.append(statistical_parity_difference(y,y_pred,prot_attr=prot1,pos_label=pos_label,priv_group=1))
disp_im.append(disparate_impact_ratio(y,y_pred,prot_attr=prot1,pos_label=pos_label,priv_group=1))
eq_opp.append(equal_opportunity_difference(y,y_pred,prot1,pos_label=pos_label,priv_group=1))
ave_odds.append(average_odds_difference(y,y_pred,prot1,pos_label=pos_label,priv_group=1))

Second, we compute the metrics focusing on age. `priv_group` is 0, i.e. people older than 25 years old.

In [78]:
dataset.append('Age_in_years/young')
stat_par.append(statistical_parity_difference(y,y_pred,prot_attr=prot2,pos_label=pos_label,priv_group=0)) 
disp_im.append(disparate_impact_ratio(y,y_pred,prot_attr=prot2,pos_label=pos_label,priv_group=0))
eq_opp.append(equal_opportunity_difference(y,y_pred,prot_attr=prot2,pos_label=pos_label,priv_group=0))
ave_odds.append(average_odds_difference(y,y_pred,prot_attr=prot2,pos_label=pos_label,priv_group=0))

Finally, we merge the two.

In [79]:
pd.DataFrame(np.array([stat_par, disp_im, eq_opp, ave_odds]).T, 
             columns = ['Statistical Parity', 'Disparate Impact', 
             'Equal Opportunity', 'Average Odds'], index = dataset)

Unnamed: 0,Statistical Parity,Disparate Impact,Equal Opportunity,Average Odds
Personal_status_and_sex/female,-0.135624,0.83319,-0.052094,-0.134458
Age_in_years/young,-0.229459,0.770541,-0.107296,-0.311173
