In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.cluster import KMeans

In [14]:
def discrimination(data, target, sens, expl, max_corr=.1):
    # target
    # sens: sensitive attribute
    # expl: explanatory attribute(s), str or list
    group_priv = data[data[sens]==2]
    group_prot = data[data[sens]==1]
    n_priv = group_priv.shape[0]
    n_prot = group_prot.shape[0]
    
    D_all = np.sum(group_priv[target]==1)/n_priv - np.sum(group_prot[target] == 1)/n_prot
    print('Total discrimination: %.2f' % D_all)
    
    # multiple explanatory attributes
    if isinstance(expl, list):
#         high_corr = list(data.columns[np.abs(data.corr()[sens].sort_values()) > max_corr])
        high_corr = []
        for e in expl: 
            if e in high_corr: 
                print(e, 'is highly correlated with', sens)
        expl = [e for e in expl if e not in high_corr]
        data_expl = pd.Series(KMeans(n_clusters=6).fit(data[expl]).labels_)
    else:
        data_expl = data[expl]
    
    data_expl_priv = data_expl[data[sens]==2]
    data_expl_prot = data_expl[data[sens]==1]
        
    expl_values = data_expl.unique()
    D_expl = 0 
    
    for e_i in expl_values:
        P_star = (np.sum((group_priv[target]==1) & (data_expl_priv == e_i))/n_priv + 
                  np.sum((group_prot[target]==1) & (data_expl_prot == e_i))/n_prot)/2
        D_expl += (np.sum(data_expl_priv == e_i)/n_priv - np.sum(data_expl_prot == e_i)/n_prot) * P_star
        
    print('Discrimination explainable by %s: %.2f' % (', '.join(expl), D_expl))
    
    D_illegal = D_all - D_expl
    print('Unexplainable discrimination: %.2f' % D_illegal)
    
    return (D_all, D_expl, D_illegal)

Import data and ignore native workers

In [18]:
credit_df = pd.read_csv('./resampled_nation.csv')
credit_df.columns

legal = credit_df.columns[ [1, 3, 5, 6, 8, 10, 12, 14, 16 ] ]
maybe = credit_df.columns[ [1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 14, 15, 16, 17 ] ]

male_idx = (credit_df['Sex & Marital Status']==1) | (credit_df['Sex & Marital Status']==3) | \
                 (credit_df['Sex & Marital Status']==4)
female_idx = (credit_df['Sex & Marital Status']==2) | (credit_df['Sex & Marital Status']==5)
native_idx = (credit_df['Foreign Worker']==2)
foreign_idx = (credit_df['Foreign Worker']==1)

# insert a column of gender, 1 female, 2 female
credit_df.insert(loc=len(credit_df.columns), column='gender', value=1)
credit_df.loc[male_idx, 'gender'] = 2

# remove rows of native workers
credit_df = credit_df[foreign_idx]
credit_df = credit_df.reset_index(drop=True)

# check the original discrimination w.r.t. gender after removing native workers
discrimination(credit_df, 'Creditability', 'gender', list(legal))

credit_df.corr()

Total discrimination: 0.12
Discrimination explainable by Account Balance, Payment Status of Previous Credit, Credit Amount, Value Savings/Stocks, Instalment per cent, Guarantors, Most valuable available asset, Concurrent Credits, No of Credits at this Bank: 0.04
Unexplainable discrimination: 0.08


Unnamed: 0,Creditability,Account Balance,Duration of Credit (month),Payment Status of Previous Credit,Purpose,Credit Amount,Value Savings/Stocks,Length of current employment,Instalment per cent,Sex & Marital Status,...,Most valuable available asset,Age (years),Concurrent Credits,Type of apartment,No of Credits at this Bank,Occupation,No of dependents,Telephone,Foreign Worker,gender
Creditability,1.0,0.305876,-0.356103,0.141212,-0.142146,-0.324871,0.212737,-0.049507,-0.031011,0.187233,...,-0.206193,0.023524,0.176702,0.009309,0.034464,-0.236427,0.130624,-0.19289,0.232325,0.123399
Account Balance,0.305876,1.0,-0.116006,-0.044355,-0.024172,-0.119262,0.288867,0.143971,0.083687,0.121248,...,0.096935,0.075975,0.116866,0.022006,-0.032084,-0.032639,0.046378,-0.025749,-0.082068,0.124164
Duration of Credit (month),-0.356103,-0.116006,1.0,-0.172924,0.358485,0.651721,0.005645,0.083694,0.071806,-0.192687,...,0.370965,-0.102406,-0.185463,0.176634,-0.042907,0.330988,-0.193504,0.301427,-0.386479,-0.207753
Payment Status of Previous Credit,0.141212,-0.044355,-0.172924,1.0,-0.17195,-0.202143,-0.027384,0.180951,0.002058,0.117104,...,-0.178548,0.247101,0.243325,0.005506,0.536339,-0.1104,0.18266,0.022065,0.042953,0.130734
Purpose,-0.142146,-0.024172,0.358485,-0.17195,1.0,0.294799,-0.066877,-0.01118,0.069136,-0.105845,...,0.110098,-0.053939,-0.13451,0.184029,0.022992,0.251789,-0.170353,0.223673,-0.332328,-0.173606
Credit Amount,-0.324871,-0.119262,0.651721,-0.202143,0.294799,1.0,0.026366,0.059581,-0.331996,-0.296957,...,0.239611,-0.006282,-0.186377,0.116305,-0.076812,0.408484,-0.121773,0.422815,-0.071254,-0.194529
Value Savings/Stocks,0.212737,0.288867,0.005645,-0.027384,-0.066877,0.026366,1.0,0.072897,-0.021828,0.037914,...,0.061505,-0.002578,0.133522,0.02545,-0.095014,-0.044503,-0.069238,0.015992,0.03313,-0.010618
Length of current employment,-0.049507,0.143971,0.083694,0.180951,-0.01118,0.059581,0.072897,1.0,0.074041,-0.057036,...,0.103137,0.323616,0.068742,0.079749,0.070239,0.142414,0.029483,0.129028,-0.066489,0.143591
Instalment per cent,-0.031011,0.083687,0.071806,0.002058,0.069136,-0.331996,-0.021828,0.074041,1.0,0.193468,...,0.167463,-0.003395,-0.07036,0.117943,0.025049,0.067896,-0.185256,-0.061908,-0.236161,0.13204
Sex & Marital Status,0.187233,0.121248,-0.192687,0.117104,-0.105845,-0.296957,0.037914,-0.057036,0.193468,1.0,...,-0.126155,-0.040897,0.039128,0.024166,0.086082,-0.242347,0.116797,-0.217303,0.196077,0.642956


In [None]:
Find the correct 