# Baseline measures

Step1. Import packages

The sub-package used to compute the baseline measures is aif360.sklearn. This package allows users to apply the bias metrics on their own datasets. For more information, please refer to
https://github.com/Trusted-AI/AIF360/tree/master/aif360/sklearn.

In [3]:
import numpy as np
import pandas as pd
import random
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelEncoder
!pip install 'aif360[OptimPreproc]' 

from sklearn.linear_model import LogisticRegression, LogisticRegressionCV, SGDClassifier
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from aif360.sklearn.metrics import consistency_score,generalized_entropy_error,generalized_entropy_index,theil_index,coefficient_of_variation
from aif360.sklearn.metrics import statistical_parity_difference,disparate_impact_ratio,equal_opportunity_difference,average_odds_difference
from aif360.sklearn.datasets import standardize_dataset, to_dataframe
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier

Preprocess dataset

In [49]:
df_raw = pd.read_csv('german.data', sep=' ', header=None, names = cols)

cols = ['Status_of_existing_checking_account','Duration_in_month', 
        'Credit_history', 'Purpose', 'Credit_amount', 'Savings_accountbonds', 
        'Present_employment_since', 'Installment_rate_in_percentage_of_disposable_income', 
        'Personal_status_and_sex', 'Other_debtorsguarantors', 'Present_residence_since', 
        'Property', 'Age_in_years', 'Other_installment_plans', 'Housing', 
        'Number_of_existing_credits_at_this_bank', 'Job', 'Number_of_people_being_liable_to_provide_maintenance_for', 
        'Telephone', 'Foreign_worker', 'Creditworthiness']
df_raw.columns = cols
df_raw.sort_values('Creditworthiness', kind='stable', inplace=True,ignore_index=True)

# Since the numeric variable 'Number_of_people_being_liable_to_provide_maintenance_for' is dichotomous, it's going to be treated as a nominal variable.
df_raw['Number_of_people_being_liable_to_provide_maintenance_for'] = df_raw['Number_of_people_being_liable_to_provide_maintenance_for'].astype('object')
#df_raw['Creditworthiness'] = df_raw['Creditworthiness'].astype('object')

# Separate nominal and numeric columns
nom_cols = df_raw.select_dtypes(include='object').columns
num_cols = df_raw.select_dtypes(exclude='object').columns

# Recode nominal variables
cleanup_nums = {"Status_of_existing_checking_account": {"A11":0, "A14":3, "A12":1, "A13":2},
                "Credit_history": {"A34":4, "A32":2, "A30":0, "A31":1, "A33":3},
                "Purpose": {"A43":3, "A46":6, "A42":2, "A41":1, "A40":0, "A49":9, "A44":4, "A45":5, "A410":10, "A48":8},
                "Savings_accountbonds": {"A65":5, "A61":1, "A63":3, "A64":4, "A62":2},
                "Present_employment_since": {"A75":5, "A74":4, "A73":3, "A72":2, "A71":1},
                "Personal_status_and_sex": {"A93":1, "A91":1, "A92":2, "A94":1},
                "Other_debtorsguarantors": {"A101":1, "A103":3, "A102":2},
                "Property": {"A121":1, "A122":2, "A124":4, "A123":3},
                "Other_installment_plans": {"A143":3, "A141":1, "A142":2},
                "Housing": {"A152":2, "A153":3, "A151":1},
                "Job": {"A173":3, "A172":2, "A174":4, "A171":1},
                "Telephone": {"A192":2, "A191":1},
                "Foreign_worker": {"A201": 1, "A202":2},
                "Creditworthiness": {1: 0, 2: 1}}
df_nom = df_raw.loc[:,nom_cols] 
df_nom.replace(cleanup_nums, inplace=True)
df_nom = df_nom.astype('object')

# Min-max normalize numeric variables
df_num = df_raw.loc[:,num_cols]
scaled=np.subtract(df_num.values,np.min(df_num.values,axis=0))/np.subtract(np.max(df_num.values,axis=0),np.min(df_num.values,axis=0))
df_num = pd.DataFrame(scaled, columns=df_num.columns)

df = pd.concat([df_nom,df_num],axis=1)

Step2. Import and preprocess dataset. Initialize objects.

Attribute 8 is *Gender* and attribute 12 is *Age*. For more information about preprocessing please refer to https://aif360.readthedocs.io/en/latest/modules/generated/aif360.sklearn.datasets.standardize_dataset.html#aif360.sklearn.datasets.standardize_dataset.  

In [51]:
# preprocess data following aif360.sklearn instructions
X,y = standardize_dataset(df,prot_attr=['Personal_status_and_sex','Age_in_years'], target = 'Creditworthiness')

Step3. Compute individal and group fairness baseline measures

**Individual fairness metrics**:
- Consistency score: measures how similar the labels are for similar instances
- Generalised entropy error: measures inequality over a population. This algorithm compares the predictions made by a classifier with the ground truth. To that end, a LogisticRegression is used. Note that no test-train split is made as well as no hyperparameter tuning. 

First, we compute measures using all attributes in the dataset. 

In [52]:
#German, Compas, Titanic, Synthetic3
dataset_name = 'German' 
prot1 =  'Personal_status_and_sex'
prot2 = 'Age_in_years' 
target = 'Creditworthiness'
pos_label = 0

# initialize objects
dataset = [] # scenario
consistency = [] 
generalized_entropy = []

In [53]:
name = dataset_name+'_all_attributes'
dataset.append(name) # 

X,y = standardize_dataset(df,prot_attr=[prot1,prot2],target=target)
y = y.astype('float64')
consistency.append(consistency_score(X, y))
neigh = KNeighborsClassifier(n_neighbors=5).fit(X, y.astype('int64'))
#print(neigh.score(X,y.astype('int64')))


# excl prot1
name = dataset_name+'_excl_'+prot1
dataset.append(name)
X,y = standardize_dataset(df,prot_attr=[prot1,prot2],dropcols=[prot1],target=target)
y = y.astype('float64')

consistency.append(consistency_score(X, y))
neigh = KNeighborsClassifier(n_neighbors=5).fit(X, y)
#print(neigh.score(X,y))

name = dataset_name+'_excl_'+prot2
dataset.append(name) #German_excl_gender, Compas_excl_race, Titanic_excl_pclass

# excl prot2
X,y = standardize_dataset(df,prot_attr=[prot1,prot2],
                          dropcols=[prot2],target=target)
y = y.astype('float64')

consistency.append(consistency_score(X, y))
neigh = KNeighborsClassifier(n_neighbors=5).fit(X, y)
#print(neigh.score(X,y))

In [54]:
# full
X,y = standardize_dataset(df,prot_attr=[prot1,prot2],target=target)
y = y.astype('float64')

model = LogisticRegression(max_iter=1000,random_state=1).fit(X,y)
y_pred = model.predict(X)
#print(model.score(X,y))

generalized_entropy.append(generalized_entropy_error(y, y_pred,pos_label=pos_label))

# excl. prot1
X,y = standardize_dataset(df,prot_attr=[prot1,prot2],dropcols=[prot1],target=target)
y = y.astype('float64')
model = LogisticRegression(max_iter=1000,random_state=1)
model.fit(X,y)
y_pred = model.predict(X)
#print(model.score(X,y))

generalized_entropy.append(generalized_entropy_error(y, y_pred,pos_label=pos_label))

# excl. prot1
X,y = standardize_dataset(df,prot_attr=[prot1,prot2],dropcols=[prot2],target=target)
y = y.astype('float64')
model = LogisticRegression(max_iter=1000,random_state=1)
model.fit(X,y)
y_pred = model.predict(X)
#print(model.score(X,y))

generalized_entropy.append(generalized_entropy_error(y, y_pred,pos_label=pos_label))

Second, we exclude the attribute gender from the dataset and compute measures once more.

In [55]:
baseline = pd.concat((np.round(pd.Series(consistency, name='Consistency'),3),np.round(pd.Series(generalized_entropy, name='GEE'),3)),1)
baseline.index = dataset
baseline

Unnamed: 0,Consistency,GEE
German_all_attributes,0.746,0.094
German_excl_Personal_status_and_sex,0.743,0.095
German_excl_Age_in_years,0.746,0.093


## Group Fairness

**Group fairness metrics**:
- Statistical parity difference
- Disparate impact
- Equal opportunity difference
- Average odds difference

In [152]:
dataset_name = 'German' 
prot1 =  'Personal_status_and_sex' 
prot2 = 'Age_in_years' 
target = 'Creditworthiness' 
pos_label = 0

# initialize objects
dataset = [] # scenario
stat_par = [] 
disp_im = []
eq_opp = []
ave_odds = []

In [153]:
# preprocess data following aif360.sklearn instructions
X,y = standardize_dataset(df,prot_attr=[prot1,prot2],target=target)
y = y.astype('float')

age_in_years = df.Age_in_years * (df_raw.Age_in_years.max() - df_raw.Age_in_years.min()) + df_raw.Age_in_years.min()
X['Age_in_years'] = age_in_years.values
X.Age_in_years = np.where(X.Age_in_years>25,int(0),int(1)) # only for German credit

model = LogisticRegression(max_iter=1000,random_state=1)
model.fit(X,y)
y_pred = model.predict(X)

We compute the four group fairness measures by setting `prot_attr` parameter to the index of the protected attribute.

First, we compute the metrics focusing on gender. `priv_group` is 1, i.e. males.

In [154]:
dataset.append('Personal_status_and_sex/female')
stat_par.append(statistical_parity_difference(y,y_pred,prot_attr=prot1,pos_label=pos_label,priv_group=1))
disp_im.append(disparate_impact_ratio(y,y_pred,prot_attr=prot1,pos_label=pos_label,priv_group=1))
eq_opp.append(equal_opportunity_difference(y,y_pred,prot1,pos_label=pos_label,priv_group=1))
ave_odds.append(average_odds_difference(y,y_pred,prot1,pos_label=pos_label,priv_group=1))

Second, we compute the metrics focusing on age. `priv_group` is 0, i.e. people older than 25 years old.

In [155]:
dataset.append('Age_in_years/young')
stat_par.append(statistical_parity_difference(y,y_pred,prot_attr=prot2,pos_label=pos_label,priv_group=0)) 
disp_im.append(disparate_impact_ratio(y,y_pred,prot_attr=prot2,pos_label=pos_label,priv_group=0))
eq_opp.append(equal_opportunity_difference(y,y_pred,prot_attr=prot2,pos_label=pos_label,priv_group=0))
ave_odds.append(average_odds_difference(y,y_pred,prot_attr=prot2,pos_label=pos_label,priv_group=0))

Finally, we merge the two.

In [156]:
pd.DataFrame(np.array([stat_par, disp_im, eq_opp, ave_odds]).T, 
             columns = ['Statistical Parity', 'Disparate Impact', 
             'Equal Opportunity', 'Average Odds'], index = dataset)

Unnamed: 0,Statistical Parity,Disparate Impact,Equal Opportunity,Average Odds
Personal_status_and_sex/female,-0.135297,0.834183,-0.056102,-0.131875
Age_in_years/young,-0.226453,0.773547,-0.104435,-0.30807
