In [None]:
import time
import openpyxl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import ListedColormap
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from scipy.optimize import minimize
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
import wntr

# Data loading

The data frame *df_leaks* holds pressure measurements for different times (along rows) and different sensors (along columns '3', '10', '23', '25', '13', '14', '22', '29'). Moreover, the sensitive features and columns 'y_group1', 'y_group2' and 'y_group3' are binary labels telling us whether (1) or whether not (0) a leak is active for that time in group j for "j = 1,2,3". Additionally, the overall label and column 'y' is a binary label telling uns whether (1) or whether not (0) a leak is active in the WDN in general.

The data frame *df_information* holds information about the leaks appearing in *df_leaks*. Each leak setting has the main characteristics 'node ID' and 'diameter'. The data *df_leaks* is generated in such a way that for each node in the WDN and each diameter 5, 10 and 15cm, there exists a period of time where a leak, defined by its location and size, is simulated. *df_information* holds information about each such setting (along rows), such as 

- 'group' (areal group to which the leaky node belongs to),
- 'node ID' (location of the leak),
- 'diameter' (size of the leak),
- 'setting start ID' (time index in the *df_leaks* at which the setting starts),
- 'leak start ID' (time index in the *df_leaks* at which the leak starts),
- 'leak end ID' (time index in the *df_leaks* at which the leak ends),
- 'setting end ID' (time index in the *df_leaks* at which the setting ends)

(along columns).

The data frame *df_noleaks* is of the same structure as *df_leaks* and is used as a comparison data set. There are no leaks during all time, i.e., along all rows.

In [None]:
df_leaks = pd.read_excel('../2_DataGeneration/Hanoi/data_leaks.xlsx',
                         sheet_name='leaks',
                         index_col=0)
df_noleaks = pd.read_excel('../2_DataGeneration/Hanoi/data_noleaks.xlsx',
                           sheet_name='noleaks',
                           index_col=0)
df_information = pd.read_excel('../2_DataGeneration/Hanoi/information_leaks.xlsx',
                               sheet_name='information',
                               index_col=0,
                               dtype={'node ID': str})

In [None]:
df_leaks

In [None]:
df_noleaks

In [None]:
df_information

# Pipeline definition

In [None]:
%run ./FairnessExploration_PipelineDefinition.ipynb

# Pipeline application

## Variables

In [None]:
# define time ID from which starting we want to use the data
# (data before is if used, only used for preprocessing)
time_start = 100
# define time window used for the rolling mean
time_wind = 3
# define the classifiers used per sensor node
classifier = ThresholdClassification
classifier_approx = ThresholdClassificationApproximation

We test different combinations of sensors actually used:

In [None]:
node_ids = list(df_information.loc[:,'node ID'])
sensor_ids = list(df_leaks.columns[[0,1,3]])
print('Given sensors: {}'.format(sensor_ids))

sensitive_features = list(df_leaks.columns[8:11])
print('Given sensitive features: {}'.format(sensitive_features))

## Visualization - Network

In [None]:
wn_hanoi = wntr.network.WaterNetworkModel('../1_FeatureGeneration/models/Hanoi.inp') 

plot_network_Hanoi(node_ids=node_ids,
                   sensor_ids=sensor_ids,
                   df_information=df_information,
                   wn=wn_hanoi,
                   name='Hanoi',
                   save_figs=False)

## Preprocessing for Regression

In [None]:
preprocessor = Preprocessing_RollingMean(time_start=time_start,
                                         time_wind=time_wind)
X_pre = preprocessor.transform(df_leaks.loc[:,sensor_ids])
Y_pre = df_leaks.loc[time_start:,sensor_ids]

In [None]:
#X_pre

In [None]:
#Y_pre

## Visualization - True Pressure

In [None]:
plot_data_per_timeindex(df_leaks, # same as using Y_pred
                        sensor_ids=sensor_ids,
                        start_ids=[128000],
                        end_ids=[129000],
                        #thresholds={'3':0.5, '10':0.2, '23':1, '25':0.5},
                        show_legend=True)

In [None]:
plot_data_per_timeindex_and_sensor(dfs={'pressure with no leak':df_noleaks,
                                        'pressure with potentially leak':df_leaks}, # same as using Y_pred
                                        #'predicted pressure':..},
                                   sensor_ids=sensor_ids,
                                   start_ids=[128000],
                                   end_ids=[129000],
                                   #thresholds={'3':0.5, '10':2, '23':0.2, '25':0.5},
                                   #threshold_key='pressure with potentially leak',
                                   show_legend=True)

In [None]:
plot_data_per_setting(df_leaks, # same as using Y_pred
                      df_information=df_information,
                      sensor_ids=sensor_ids,
                      node_ids=['2','5','16'],
                      diameters=[10],
                      setting_ids=None,
                      #thresholds={'3':0.5, '10':2, '23':0.2, '25':0.5},
                      time_puffer=100,
                      show_legend=True,
                      zoom_leak=True,
                      print_report=False)

In [None]:
plot_data_per_setting_and_sensor(dfs={'pressure with no leak':df_noleaks,
                                      'pressure with leak':df_leaks}, # same as using Y_pred
                                      #'predicted pressure':...},
                                 df_information=df_information,
                                 sensor_ids=sensor_ids,
                                 node_ids=['2','5','16'],
                                 diameters=[10],
                                 setting_ids=None,
                                 thresholds={'3':0.5, '10':2, '23':0.2, '25':0.5},
                                 #threshold_key='pressure with leak',
                                 leak_key='pressure with leak',
                                 time_puffer=100,
                                 show_legend=True,
                                 zoom_leak=True,
                                 print_report=False)

## Regression - Virtual Sensors

In df_information we find that the first leak appears at time ID 1541. Therefore, we use the preprocessed data up to the time ID 1540 to train the virtual sensors on non-leaky data. We use KFold-cross validation to evaluate the virtual sensors, where as a score, we used the mean r2 score and the mean RMSE over all folds *and* all sensors (as we receive a score per sensor).

In [None]:
time_end_noleaks = df_information.loc[1,'setting start ID'] - 1
print('Time ID before first leak starts:', time_end_noleaks)
X_reg_train = X_pre.loc[time_start:time_end_noleaks,:]
Y_reg_train = Y_pre.loc[time_start:time_end_noleaks,:]

In [None]:
#X_reg_train

In [None]:
#Y_reg_train

In [None]:
# ----- evaluation by cross validation

# load dataset
X = X_reg_train
Y = Y_reg_train

# --- perform evaluation by cross validation
train_r2s = list()
test_r2s = list()
train_rmses = list()
test_rmses = list()
# instantiate cross validation object
cv = KFold(n_splits=8, shuffle=False)
for train_index, test_index in cv.split(X):
    # compute cross validation folds (shift as data indices begin at time_start)
    X_train = X.loc[train_index+time_start,:]
    Y_train = Y.loc[train_index+time_start,:]
    X_test = X.loc[test_index+time_start,:]
    Y_test = Y.loc[test_index+time_start,:]
    
    # --- perform evaluation by training and testing
    # instantiate model
    regressor = LinearRegression
    model = MultiRegression(regressor)
    # train model
    model.fit(X_train, Y_train)
    # test model
    train_r2s.append(model.score(X_train, Y_train)[0])
    test_r2s.append(model.score(X_test, Y_test)[0])
    train_rmses.append(model.score(X_train, Y_train)[1])
    test_rmses.append(model.score(X_test, Y_test)[1])

# --- access results
print('Mean and variance of training r2 scores over all folds: '\
      '{0:0.5f} pm {1:0.5f}'.format(np.array(train_r2s).mean(),
                                    np.array(train_r2s).var())) 
print('Mean and variance of test r2 scores over all folds: '\
      '{0:0.5f} pm {1:0.5f}'.format(np.array(test_r2s).mean(),
                                    np.array(test_r2s).var())) 
print('Train scores:\n', train_r2s)
print('Test scores:\n', test_r2s)
print('Mean and variance of training rmse over all folds: '\
      '{0:0.5f} pm {1:0.5f}'.format(np.array(train_rmses).mean(),
                                    np.array(train_rmses).var())) 
print('Mean and variance of test r2 scores over all folds: '\
      '{0:0.5f} pm {1:0.5f}'.format(np.array(test_rmses).mean(),
                                    np.array(test_rmses).var())) 
print('Train scores:\n', train_rmses)
print('Test scores:\n', test_rmses)

In [None]:
# refit regression model on whole non-leaky data
regressor = LinearRegression
model_reg = MultiRegression(regressor)
model_reg.fit(X_reg_train, Y_reg_train)

## Preprocessing for Classification - Compute Residuals

In [None]:
time_start_leaks = df_information.loc[1,'setting start ID']
print('Time ID where first leak starts:', time_start_leaks)
X_reg_test = X_pre.loc[time_start_leaks:,:]
Y_reg_test = Y_pre.loc[time_start_leaks:,:]

# apply regression model on whole (not yet seen) data
Y_reg_pred =  model_reg.predict(X_reg_test)

# compute residuals based on true data and predicted data
X_clas = (Y_reg_test - Y_reg_pred).abs()

X_sen = df_leaks.loc[time_start_leaks:,sensitive_features]
y_clas = df_leaks.loc[time_start_leaks:,['y']]

In [None]:
#X_reg_test

In [None]:
#Y_reg_test

In [None]:
#Y_reg_pred

In [None]:
#X_clas

In [None]:
#X_sen

In [None]:
#y_clas

## Visualization - True and Predicted Pressure

In [None]:
plot_data_per_timeindex_and_sensor(dfs={'pressure with no leak':df_noleaks,
                                        'pressure with potentially leak':Y_reg_test, # part of df_leaks
                                        'predicted pressure':Y_reg_pred},
                                   sensor_ids=sensor_ids,
                                   start_ids=[128000],
                                   end_ids=[129000],
                                   #thresholds={'3':0.5, '10':2, '23':0.2, '25':0.5},
                                   #threshold_key='pressure with potentially leak',
                                   show_legend=True)

In [None]:
plot_data_per_setting_and_sensor(dfs={#'pressure with no leak':df_noleaks,
                                      'pressure with leak':Y_reg_test, # part of df_leaks
                                      'predicted pressure':Y_reg_pred},
                                 df_information=df_information,
                                 sensor_ids=['3', '10'],
                                 node_ids=[str(25)],
                                 diameters=[15],
                                 setting_ids=None,
                                 thresholds={'3':0.69, '10':1.80, '25':1.87},
                                 threshold_key='pressure with leak',
                                 leak_key='pressure with leak',
                                 time_puffer=50,
                                 show_legend=True,
                                 zoom_leak=True,
                                 print_report=False)

In [None]:
# plot pressure *residuals*
plot_data_per_setting(X_clas,
                      df_information=df_information,
                      sensor_ids=sensor_ids,
                      node_ids=[str(x) for x in range(2,33)],
                      diameters=[10],
                      setting_ids=None,
                      #thresholds={'3':0.5, '10':2, '23':0.2, '25':0.5},
                      time_puffer=100,
                      show_legend=True,
                      zoom_leak=True,
                      print_report=False)

## Classification - Leak Detector(s)

We now use the virtual sensors to predict the pressure even for times where a leak is active in the WDN. We make use of the residuals $|p_j(t_i) - f_j^r(p_{\neq j}(t_i))| \in \mathbb{R}$ to define a threshold-based classifier that predicts whether a leak is active (1) or not (0) at time $t_i$. 

In [None]:
# create dictionary to store all results
# which are visualized at the end
results_fairness= dict()
results_nofairness = dict()
# define which fairness method
# should improve which non-fairness method
comparisons = {'TFPR+COV-db-log':'TFPR-db',
               'TFPR+COV-ndb-log':'TFPR-ndb',
               'TFPR+COV-ndb-max':'TFPR-ndb',
               'ACC+COV-db-log':'ACC-db',
               'ACC+COV-ndb-log':'ACC-ndb',
               'ACC+COV-ndb-max':'ACC-ndb',
               'COV+ACC-ndb-log':'ACC-ndb',
               'COV+ACC-ndb-max':'ACC-ndb',
               'EO+ACC-ndb-log':'ACC-ndb',
               'EO+ACC-ndb-max':'ACC-ndb',
               'DI+ACC-ndb-log':'ACC-ndb',           # this is different to the Springer-version
               'DI+ACC-ndb-max':'ACC-ndb'}

### Diameter = 5

In [None]:
# filter the training and test data according to the diameter
diameter = 5
results = filter_diameter_Hanoi(X_clas, y_clas, X_sen, diameter=diameter, df_information=df_information)
X_clas_train, X_clas_test, y_clas_train, y_clas_test, X_sen_train, X_sen_test = results
print(X_sen_train.sum())
print(X_sen_test.sum())

# create dictionary to store all results 
# which are visualized at the end
results_d5 = dict()
results_fairness[diameter] = dict()
results_nofairness[diameter] = dict()

#### Method: Choose hyperparameter

In [None]:
model_clas = ETC_hyperparameter()
model_clas.fit(X_clas_train, 
               factor=0.15, 
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)
# define the starting point for all other algorithms
start_thresholds = model_clas.thresholds

In [None]:
max_TPR = max(list(TPRs.values()))
min_TPR = min(list(TPRs.values()))
print('{} & {} & {} & {} & {} & {} & {}'.format(round(acc, 4),
                                                round(max_TPR, 4),
                                                round(min_TPR, 4),
                                                round(di, 4),
                                                round(eo, 4),
                                                round((1-eo/max_TPR), 4),
                                                round((1-di)*max_TPR, 4)))

In [None]:
results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

#### Method: Optimize TPR - FPR  (db.)

In [None]:
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeFTPR_db(alias='TFPR-db')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               b_sigmoid=b_sigmoid, 
               sum_threshold=sum_threshold,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

In [None]:
_, y_pred_approx = model_clas.predict_approx(X_clas_train,
                                             b_sigmoid=b_sigmoid, 
                                             sum_threshold=sum_threshold)

sensitive_features = list(X_sen_train.columns)
for sensitive_feature in sensitive_features:
    x_sen = X_sen_train.loc[:,sensitive_feature]
    print('Sensitive feature:', sensitive_feature)
    print(Cov(x_sen, y_pred_approx.loc[:,'y']))

#### Method: Optimize TPR - FPR while enhancing fairness (db.)

In [None]:
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeFTPR_F_db(alias='TFPR+COV-db-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               b_sigmoid=b_sigmoid, 
               sum_threshold=sum_threshold,
               c=0.05, #0.045, 0.05!, 0.06, 0.07, 0.08
               mu=0.1, #0.1
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.05
extra_hypers = [0.045]
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeFTPR_F_db(alias='TFPR+COV-db-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('Disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeFTPR_F_db(alias='TFPR+COV-db-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   b_sigmoid=b_sigmoid, 
                   sum_threshold=sum_threshold,
                   c=c, #0.045, 0.05!, 0.06, 0.07, 0.08
                   mu=0.1, #0.1
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize TPR - FPR  (ndb.)

In [None]:
model_clas = ETC_optimizeFTPR_ndb(alias='TFPR-ndb')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

In [None]:
_, y_pred_approx = model_clas.predict_approx(X_clas_train,
                                             b_sigmoid=b_sigmoid, 
                                             sum_threshold=sum_threshold)

sensitive_features = list(X_sen_train.columns)
for sensitive_feature in sensitive_features:
    x_sen = X_sen_train.loc[:,sensitive_feature]
    print('Sensitive feature:', sensitive_feature)
    print(Cov(x_sen, y_pred_approx.loc[:,'y']))

#### Method: Optimize TPR - FPR while enhancing fairness (ndb., log-barrier)

In [None]:
model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-log')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               c=0.05, #0.044, 0.05!, 0.06, 0.07
               mu=0.2, #0.2
               barrier='log',
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.05
extra_hypers = [0.044]
model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('Disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   c=c, #0.044, 0.05!, 0.06, 0.07
                   mu=0.2, #0.2
                   barrier='log',
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize TPR - FPR while enhancing fairness (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-max')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               c=0.03, #0.0, 0.01, 0.02, 0.03!
               mu=100, #100
               barrier='max',
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.0
extra_hypers = []
model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('Disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   c=c, #0.0, 0.01, 0.02, 0.03!
                   mu=100, #100
                   barrier='max',
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize ACC (db.)

In [None]:
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeACC_db(alias='ACC-db')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               b_sigmoid=b_sigmoid, 
               sum_threshold=sum_threshold,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

In [None]:
_,_,_,_,_,acc_best_db,_ = model_clas.score(X_clas_train, 
                                        y_clas_train,
                                        print_all_scores=False)
acc_best_db

In [None]:
_, y_pred_approx = model_clas.predict_approx(X_clas_train,
                                             b_sigmoid=b_sigmoid, 
                                             sum_threshold=sum_threshold)

sensitive_features = list(X_sen_train.columns)
for sensitive_feature in sensitive_features:
    x_sen = X_sen_train.loc[:,sensitive_feature]
    print('Sensitive feature:', sensitive_feature)
    print(Cov(x_sen, y_pred_approx.loc[:,'y']))

#### Method: Optimize ACC while enhancing fairness (db.)

In [None]:
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeACC_F_db(alias='ACC+COV-db-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               b_sigmoid=b_sigmoid, 
               sum_threshold=sum_threshold,
               c=0.07, #0.05, 0.055, 0.06, 0.07!, 0.08, ..., 0.17
               mu=0.15, #0.15
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.06
extra_hypers = [0.05, 0.055]
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeACC_F_db(alias='ACC+COV-db-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeACC_F_db(alias='ACC+COV-db-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   b_sigmoid=b_sigmoid, 
                   sum_threshold=sum_threshold,
                   c=c, #0.05, 0.055, 0.06, 0.07!, 0.08, ..., 0.17
                   mu=0.15, #0.15
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize ACC (ndb.)

In [None]:
model_clas = ETC_optimizeACC_ndb(alias='ACC-ndb')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

In [None]:
_,_,_,_,_,acc_best_ndb,_ = model_clas.score(X_clas_train, 
                                         y_clas_train,
                                         print_all_scores=False)
acc_best_ndb

In [None]:
_, y_pred_approx = model_clas.predict_approx(X_clas_train,
                                             b_sigmoid=b_sigmoid, 
                                             sum_threshold=sum_threshold)

sensitive_features = list(X_sen_train.columns)
for sensitive_feature in sensitive_features:
    x_sen = X_sen_train.loc[:,sensitive_feature]
    print('Sensitive feature:', sensitive_feature)
    print(Cov(x_sen, y_pred_approx.loc[:,'y']))

#### Method: Optimize ACC while enhancing fairness (ndb., log-barrier)

In [None]:
model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-log')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               c=0.07, #0.045, 0.05, 0.06, 0.07!
               mu=0.2, #0.2
               barrier='log',
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.05
extra_hypers = [0.045]
model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   c=c, #0.045, 0.05, 0.06, 0.07!
                   mu=0.2, #0.2
                   barrier='log',
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize ACC while enhancing fairness (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-max')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               c=0.02, #0.0, 0.01, 0.02!, 0.03
               mu=100, #100
               barrier='max',
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.0
extra_hypers = []
model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   c=c, #0.0, 0.01, 0.02!, 0.03
                   mu=100, #100
                   barrier='max',
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize COV (ndb., log-barrier)

In [None]:
acc_best_db

In [None]:
acc_best_ndb

In [None]:
acc_best = acc_best_db

In [None]:
model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=0.01, #0.01
               lamb=0.13, #0.08, ...,0.12, 0.13!, 0.14, ..., 0.21
               barrier='log',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.21
model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,40) if round(start_hyper-i*0.01,2)>0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=0.01, #0.01
                   lamb=lamb, #0.08, ...,0.12, 0.13!, 0.14, ..., 0.21
                   barrier='log',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize COV (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-max')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=100, #100
               lamb=0.09, #0.05, ... 0.08, 0.09!, 0.1, ..., 0.19
               barrier='max',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.19
model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,40) if round(start_hyper-i*0.01,2)>=0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=100, #100
                   lamb=lamb, #0.05, ... 0.08, 0.09!, 0.1, ..., 0.19
                   barrier='max',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize DI (ndb., log-barrier)

In [None]:
model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=0.05, #0.05
               lamb=0.04, #0.01, 0.02, 0.03, 0.04!, 0.05, ..., 0.2,
               barrier='log',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.2
model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,40) if round(start_hyper-i*0.01,2)>0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=0.05, #0.05
                   lamb=lamb, #0.01, 0.02, 0.03, 0.04!, 0.05, ..., 0.2
                   barrier='log',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize DI (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-max')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=100, #100
               lamb=0.04, #0.00, ..., 0.03, 0.04!, 0.05, ..., 0.19
               barrier='max',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.19
model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,40) if round(start_hyper-i*0.01,2)>=0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=100, #100
                   lamb=lamb, #0.00, ..., 0.03, 0.04!, 0.05, ..., 0.19
                   barrier='max',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize EO (ndb., log-barrier)

In [None]:
model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=0.05, #0.05
               lamb=0.04, #0.01, 0.02, 0.03, 0.04!, 0.05, ..., 0.21
               barrier='log',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.21
model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
eo_nofairness = results_nofairness[diameter][comparison_algo]['eo']
print('equal opportunity of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(eo_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,40) if round(start_hyper-i*0.01,2)>0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=0.05, #0.05
                   lamb=lamb, #0.01, 0.02, 0.03, 0.04!, 0.05, ..., 0.21
                   barrier='log',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if eo >= eo_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize EO (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-max')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=100, #100
               lamb=0.09, #0.03, ..., 0.08, 0.09!, 0.1, ..., 0.19
               barrier='max',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.19
model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
eo_nofairness = results_nofairness[diameter][comparison_algo]['eo']
print('equal opportunity of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(eo_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,40) if round(start_hyper-i*0.01,2)>=0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=100, #100
                   lamb=lamb, #0.03, ..., 0.08, 0.09!, 0.1, ..., 0.19
                   barrier='max',
                   acc_best=acc_best_ndb,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if eo >= eo_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

### Diameter = 10

In [None]:
# filter the training and test data according to the diameter
diameter = 10
results = filter_diameter_Hanoi(X_clas, y_clas, X_sen, diameter=diameter, df_information=df_information)
X_clas_train, X_clas_test, y_clas_train, y_clas_test, X_sen_train, X_sen_test = results
print(X_sen_train.sum())
print(X_sen_test.sum())

# create dictionary to store all results 
# which are visualized at the end
results_d10 = dict()
results_fairness[diameter] = dict()
results_nofairness[diameter] = dict()

#### Method: Choose hyperparameter

In [None]:
model_clas = ETC_hyperparameter()
model_clas.fit(X_clas_train, factor=0.2, print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)
# define the starting point for all other algorithms
start_thresholds = model_clas.thresholds

In [None]:
max_TPR = max(list(TPRs.values()))
min_TPR = min(list(TPRs.values()))
print('{} & {} & {} & {} & {} & {} & {}'.format(round(acc, 4),
                                                round(max_TPR, 4),
                                                round(min_TPR, 4),
                                                round(di, 4),
                                                round(eo, 4),
                                                round((1-eo/max_TPR), 4),
                                                round((1-di)*max_TPR, 4)))

In [None]:
results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

#### Method: Optimize TPR - FPR (db.)

In [None]:
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeFTPR_db(alias='TFPR-db')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               b_sigmoid=b_sigmoid, 
               sum_threshold=sum_threshold,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

In [None]:
_, y_pred_approx = model_clas.predict_approx(X_clas_train,
                                             b_sigmoid=b_sigmoid, 
                                             sum_threshold=sum_threshold)

sensitive_features = list(X_sen_train.columns)
for sensitive_feature in sensitive_features:
    x_sen = X_sen_train.loc[:,sensitive_feature]
    print('Sensitive feature:', sensitive_feature)
    print(Cov(x_sen, y_pred_approx.loc[:,'y']))

#### Method: Optimize TPR - FPR while enhancing fairness (db.)

In [None]:
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeFTPR_F_db(alias='TFPR+COV-db-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               b_sigmoid=b_sigmoid, 
               sum_threshold=sum_threshold,
               c=0.11, #0.07, 0.08, 0.09, 0.1, 0.11!, 0.12 ..., 0.22
               mu=0.2, #0.2
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.07
extra_hypers = []
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeFTPR_F_db(alias='TFPR+COV-db-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))

# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeFTPR_F_db(alias='TFPR+COV-db-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   b_sigmoid=b_sigmoid, 
                   sum_threshold=sum_threshold,
                   c=c, #0.07, 0.08, 0.09, 0.1, 0.11!, 0.12 ..., 0.22
                   mu=0.2, #0.2
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize TPR - FPR  (ndb.)

In [None]:
model_clas = ETC_optimizeFTPR_ndb(alias='TFPR-ndb')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

In [None]:
_, y_pred_approx = model_clas.predict_approx(X_clas_train,
                                             b_sigmoid=b_sigmoid, 
                                             sum_threshold=sum_threshold)

sensitive_features = list(X_sen_train.columns)
for sensitive_feature in sensitive_features:
    x_sen = X_sen_train.loc[:,sensitive_feature]
    print('Sensitive feature:', sensitive_feature)
    print(Cov(x_sen, y_pred_approx.loc[:,'y']))

#### Method: Optimize TPR - FPR while enhancing fairness (ndb., log-barrier)

In [None]:
model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-log')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               c=0.11, #0.07, 0.08, 0.09, 0.1, 0.11!, 0.12 ..., 0.33
               mu=0.25, #0.25
               barrier='log',
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.07
extra_hypers = []
model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('Disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   c=c, #0.07, 0.08, 0.09, 0.1, 0.11!, 0.12 ..., 0.33
                   mu=0.25, #0.25
                   barrier='log',
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize TPR - FPR while enhancing fairness (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-max')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               c=0.05, #0.0, 0.1, ..., 0.04, 0.05!, 0.06, 0.07
               mu=100, #100
               barrier='max',
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.0
extra_hypers = []
model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('Disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   c=c, #0.0, 0.1, ..., 0.04, 0.05!, 0.06, 0.07
                   mu=100, #100
                   barrier='max',
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize ACC (db.)

In [None]:
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeACC_db(alias='ACC-db')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               b_sigmoid=b_sigmoid, 
               sum_threshold=sum_threshold,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

In [None]:
_,_,_,_,_,acc_best_db,_ = model_clas.score(X_clas_train, 
                                        y_clas_train,
                                        print_all_scores=False)
acc_best_db

In [None]:
_, y_pred_approx = model_clas.predict_approx(X_clas_train,
                                             b_sigmoid=b_sigmoid, 
                                             sum_threshold=sum_threshold)

sensitive_features = list(X_sen_train.columns)
for sensitive_feature in sensitive_features:
    x_sen = X_sen_train.loc[:,sensitive_feature]
    print('Sensitive feature:', sensitive_feature)
    print(Cov(x_sen, y_pred_approx.loc[:,'y']))

#### Method: Optimize ACC while enhancing fairness (db.)

In [None]:
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeACC_F_db(alias='ACC+COV-db-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               b_sigmoid=b_sigmoid, 
               sum_threshold=sum_threshold,
               c=0.09, #0.065, 0.07, 0.08, 0.09!, 0.1, ..., 0.14
               mu=0.05, #0.05
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.07
extra_hypers = [0.065]
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeACC_F_db(alias='ACC+COV-db-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeACC_F_db(alias='ACC+COV-db-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   b_sigmoid=b_sigmoid, 
                   sum_threshold=sum_threshold,
                   c=c, #0.065, 0.07, 0.08, 0.09!, 0.1, ..., 0.14
                   mu=0.05, #0.05
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize ACC (ndb.)

In [None]:
model_clas = ETC_optimizeACC_ndb(alias='ACC-ndb')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

In [None]:
_,_,_,_,_,acc_best_ndb,_ = model_clas.score(X_clas_train, 
                                         y_clas_train,
                                         print_all_scores=False)
acc_best_ndb

In [None]:
_, y_pred_approx = model_clas.predict_approx(X_clas_train,
                                             b_sigmoid=b_sigmoid, 
                                             sum_threshold=sum_threshold)

sensitive_features = list(X_sen_train.columns)
for sensitive_feature in sensitive_features:
    x_sen = X_sen_train.loc[:,sensitive_feature]
    print('Sensitive feature:', sensitive_feature)
    print(Cov(x_sen, y_pred_approx.loc[:,'y']))

#### Method: Optimize ACC while enhancing fairness (ndb., log-barrier)

In [None]:
model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               c=0.14, #0.07, ...,0.13, 0.14!, 0.15, ...., 0.42
               mu=0.3, #0.3
               barrier='log',
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.07
extra_hypers = []
model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   c=c, #0.07, ...,0.13, 0.14!, 0.15, ...., 0.42 
                   mu=0.3, #0.3
                   barrier='log',
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize ACC while enhancing fairness (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-max')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               c=0.05, #0.0, ..., 0.04, 0.05!, 0.06, 0.07
               mu=100, #100
               barrier='max',
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.0
extra_hypers = []
model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   c=c, #0.0, ..., 0.04, 0.05!, 0.06, 0.07
                   mu=100, #100
                   barrier='max',
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize COV (ndb., log-barrier)

In [None]:
acc_best_db

In [None]:
acc_best_ndb

In [None]:
acc_best = acc_best_db

In [None]:
model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=0.01, #0.01
               lamb=0.11, #0.03, ... ,0.1, 0.11!, 0.12, ..., 0.41
               barrier='log',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.41
end_hyper = 0.03
model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,50) if round(start_hyper-i*0.01,2)>=end_hyper]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=0.01, #0.01
                   lamb=lamb, #0.03, ... ,0.1, 0.11!, 0.12, ..., 0.41
                   barrier='log',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize COV (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-max')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=100, #100
               lamb=0.15, #0.0, ...,0.14, 0.15!, 0.16, ..., 0.39
               barrier='max',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.39
model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,40) if round(start_hyper-i*0.01,2)>=0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=100, #100
                   lamb=lamb, #0.0, ...,0.14, 0.15!, 0.16, ..., 0.39
                   barrier='max',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize DI (ndb., log-barrier)

In [None]:
model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=0.025, #0.025
               lamb=0.03, #0.03!, 0.04, ..., 0.45
               barrier='log',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.45
end_hyper = 0.03
model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,50) if round(start_hyper-i*0.01,2)>=end_hyper]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict(alias='DI+ACC-ndb-log')
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=0.025, #0.025
                   lamb=lamb, #0.03!, 0.04, ..., 0.45
                   barrier='log',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize DI (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-max')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=100, #100
               lamb=0.03, #0.0, 0.01, 0.02, 0.03!, 0.04, ..., 0.39
               barrier='max',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.39
model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,40) if round(start_hyper-i*0.01,2)>=0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=100, #100
                   lamb=lamb, #0.0, 0.01, 0.02, 0.03!, 0.04, ..., 0.41
                   barrier='max',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize EO (ndb., log-barrier)

In [None]:
model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=0.025, #0.025
               lamb=0.03, #0.03!, 0.04, ..., 0.45
               barrier='log',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.45
end_hyper = 0.03
model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
eo_nofairness = results_nofairness[diameter][comparison_algo]['eo']
print('equal opportunity of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(eo_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,50) if round(start_hyper-i*0.01,2)>=end_hyper]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=0.025, #0.025
                   lamb=lamb, #0.03!, 0.04, ..., 0.45
                   acc_best=acc_best,
                   barrier='log',
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if eo >= eo_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d10[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize EO (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-max')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=100, #100
               lamb=0.03, #0.00, 0.01, 0.02, 0.03!, 0.04, ..., 0.39
               barrier='max',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.39
model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
eo_nofairness = results_nofairness[diameter][comparison_algo]['eo']
print('equal opportunity of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(eo_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,40) if round(start_hyper-i*0.01,2)>=0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=100, #100
                   lamb=lamb, #0.00, 0.01, 0.02, 0.03!, 0.04, ..., 0.39
                   barrier='max',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if eo >= eo_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d5[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

### Diameter = 15

In [None]:
# filter the training and test data according to the diameter
diameter = 15
results = filter_diameter_Hanoi(X_clas, y_clas, X_sen, diameter=diameter, df_information=df_information)
X_clas_train, X_clas_test, y_clas_train, y_clas_test, X_sen_train, X_sen_test = results
print(X_sen_train.sum())
print(X_sen_test.sum())

# create dictionary to store all results 
# which are visualized at the end
results_d15 = dict()
results_fairness[diameter] = dict()
results_nofairness[diameter] = dict()

#### Method: Choose hyperparameter

In [None]:
model_clas = ETC_hyperparameter()
model_clas.fit(X_clas_train, factor=0.2, print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)
# define the starting point for all other algorithms
start_thresholds = model_clas.thresholds

In [None]:
max_TPR = max(list(TPRs.values()))
min_TPR = min(list(TPRs.values()))
print('{} & {} & {} & {} & {} & {} & {}'.format(round(acc, 4),
                                                round(max_TPR, 4),
                                                round(min_TPR, 4),
                                                round(di, 4),
                                                round(eo, 4),
                                                round((1-eo/max_TPR), 4),
                                                round((1-di)*max_TPR, 4)))

In [None]:
results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

#### Method: Optimize TPR - FPR (db.)

In [None]:
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeFTPR_db(alias='TFPR-db')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               b_sigmoid=b_sigmoid, 
               sum_threshold=sum_threshold,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

In [None]:
_, y_pred_approx = model_clas.predict_approx(X_clas_train,
                                             b_sigmoid=b_sigmoid, 
                                             sum_threshold=sum_threshold)

sensitive_features = list(X_sen_train.columns)
for sensitive_feature in sensitive_features:
    x_sen = X_sen_train.loc[:,sensitive_feature]
    print('Sensitive feature:', sensitive_feature)
    print(Cov(x_sen, y_pred_approx.loc[:,'y']))

#### Method: Optimize TPR - FPR while enhancing fairness (db.)

In [None]:
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeFTPR_F_db(alias='TFPR+COV-db-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               b_sigmoid=b_sigmoid, 
               sum_threshold=sum_threshold,
               c=0.11, #0.09, 0.1, 0.11!, 0.12, ..., 0.17
               mu=0.2, #0.2, 0.25
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.09
extra_hypers = []
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeFTPR_F_db(alias='TFPR+COV-db-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeFTPR_F_db(alias='TFPR+COV-db-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   b_sigmoid=b_sigmoid, 
                   sum_threshold=sum_threshold,
                   c=c, #0.09, 0.1, 0.11!, 0.12, ..., 0.17
                   mu=0.2, #0.2, 0.25
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize TPR - FPR  (ndb.)

In [None]:
model_clas = ETC_optimizeFTPR_ndb(alias='TFPR-ndb')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

In [None]:
_, y_pred_approx = model_clas.predict_approx(X_clas_train,
                                             b_sigmoid=b_sigmoid, 
                                             sum_threshold=sum_threshold)

sensitive_features = list(X_sen_train.columns)
for sensitive_feature in sensitive_features:
    x_sen = X_sen_train.loc[:,sensitive_feature]
    print('Sensitive feature:', sensitive_feature)
    print(Cov(x_sen, y_pred_approx.loc[:,'y']))

#### Method: Optimize TPR - FPR while enhancing fairness (ndb., log-barrier)

In [None]:
model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-log')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               c=0.11, #0.09, 0.1, 0.11!, 0.12, ..., 0.2
               mu=0.25, #0.25
               barrier='log',
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.09
extra_hypers = []
model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('Disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   c=c, #0.09, 0.1, 0.11!, 0.12, ..., 0.2
                   mu=0.25, #0.25
                   barrier='log',
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize TPR - FPR while enhancing fairness (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-max')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               c=0.07, #0.0, ..., 0.06, 0.07!, 0.08
               mu=100, #100
               barrier='max',
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.0
extra_hypers = []
model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('Disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeFTPR_F_ndb(alias='TFPR+COV-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   c=c, #0.0, ..., 0.06, 0.07!, 0.08
                   mu=100, #100
                   barrier='max',
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize ACC (db.)

In [None]:
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeACC_db(alias='ACC-db')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               b_sigmoid=b_sigmoid, 
               sum_threshold=sum_threshold,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

In [None]:
_,_,_,_,_,acc_best_db,_ = model_clas.score(X_clas_train, 
                                        y_clas_train,
                                        print_all_scores=False)
acc_best_db

In [None]:
_, y_pred_approx = model_clas.predict_approx(X_clas_train,
                                             b_sigmoid=b_sigmoid, 
                                             sum_threshold=sum_threshold)

sensitive_features = list(X_sen_train.columns)
for sensitive_feature in sensitive_features:
    x_sen = X_sen_train.loc[:,sensitive_feature]
    print('Sensitive feature:', sensitive_feature)
    print(Cov(x_sen, y_pred_approx.loc[:,'y']))

#### Method: Optimize ACC while enhancing fairness (db.)

In [None]:
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeACC_F_db(alias='ACC+COV-db-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               b_sigmoid=b_sigmoid, 
               sum_threshold=sum_threshold,
               c=0.09, #0.082, 0.085, 0.09!, 0.1, 0.11, 0.12
               mu=0.05, #0.05
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.09
extra_hypers = [0.082, 0.085]
b_sigmoid = 100
sum_threshold = 0.8
model_clas = ETC_optimizeACC_F_db(alias='ACC+COV-db-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeACC_F_db(alias='ACC+COV-db-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   b_sigmoid=b_sigmoid, 
                   sum_threshold=sum_threshold,
                   c=c, #0.082, 0.085, 0.09!, 0.1, 0.11, 0.12
                   mu=0.05, #0.05
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize ACC (ndb.)

In [None]:
model_clas = ETC_optimizeACC_ndb(alias='ACC-ndb')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
results_nofairness[diameter][model_clas.alias] = dict()
results_nofairness[diameter][model_clas.alias]['acc'] = acc
results_nofairness[diameter][model_clas.alias]['eo'] = eo
results_nofairness[diameter][model_clas.alias]['di']= di

In [None]:
_,_,_,_,_,acc_best_ndb,_ = model_clas.score(X_clas_train, 
                                         y_clas_train,
                                         print_all_scores=False)
acc_best_ndb

In [None]:
_, y_pred_approx = model_clas.predict_approx(X_clas_train,
                                             b_sigmoid=b_sigmoid, 
                                             sum_threshold=sum_threshold)

sensitive_features = list(X_sen_train.columns)
for sensitive_feature in sensitive_features:
    x_sen = X_sen_train.loc[:,sensitive_feature]
    print('Sensitive feature:', sensitive_feature)
    print(Cov(x_sen, y_pred_approx.loc[:,'y']))

#### Method: Optimize ACC while enhancing fairness (ndb., log-barrier)

In [None]:
model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-log')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               c=0.09, #0.082, 0,085, 0.09!, 0.1, ..., 0.17
               mu=0.05, #0.05
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.09
extra_hypers = [0.082, 0.085]
model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   c=c, #0.082, 0,085, 0.09!, 0.1, ..., 0.17
                   mu=0.05, #0.05
                   barrier='log',
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize ACC while enhancing fairness (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-max')
model_clas.fit(X_clas_train, 
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               c=0.07, #0.0, ..., 0.06, 0.07!, 0.08
               mu=100, #100
               barrier='max',
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.0
extra_hypers = []
model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))
# ----- test model for different hyperparameters
Cs = extra_hypers + [round(start_hyper+i*0.01,2) for i in range(0,40)]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Cs'] = list()
for c in Cs:
    print('\nc:', c)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeACC_F_ndb(alias='ACC+COV-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   c=c, #0.0, ..., 0.06, 0.07!, 0.08
                   mu=100, #100
                   barrier='max',
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Cs'].append(c)

#### Method: Optimize COV (ndb., log-barrier)

In [None]:
acc_best_db

In [None]:
acc_best_ndb

In [None]:
acc_best = acc_best_db

In [None]:
model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=0.01, #0.01
               lamb=0.05, #0.02, ..., 0.04, 0.05!, 0.06, ..., 0.45
               barrier='log',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.45
model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,50) if round(start_hyper-i*0.01,2)>0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=0.01, #0.01
                   lamb=lamb, #0.2, ..., 0.04, 0.05!, 0.06, ..., 0.45
                   barrier='log',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize COV (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-max')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=100, #100
               lamb=0.09, #0.1, ..., 0.08, 0.09!, 0.1, ..., 0.44
               barrier='max',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.44
model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,50) if round(start_hyper-i*0.01,2)>=0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeCOV_ndb(alias='COV+ACC-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=100, #100
                   lamb=lamb, #0.1, ..., 0.08, 0.09!, 0.1, ..., 0.44
                   barrier='max',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize DI (ndb., log-barrier)

In [None]:
model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=0.025, #0.025
               lamb=0.04, #0.01, 0.02, 0.03, 0.04!, 0.05, ..., 0.44
               barrier='log',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.44
model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,50) if round(start_hyper-i*0.01,2)>0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=0.025, #0.025
                   lamb=lamb, #0.01, 0.02, 0.03, 0.04!, 0.05, ..., 0.44
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize DI (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-max')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=100, #100
               lamb=0.03, #0.0, 0.01, 0.02, 0.3!, 0.4 ..., 0.44
               barrier='max',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.44
model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
di_nofairness = results_nofairness[diameter][comparison_algo]['di']
print('disparate impact of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(di_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,50) if round(start_hyper-i*0.01,2)>=0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeDI_ndb(alias='DI+ACC-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=100, #100
                   lamb=lamb, #0.0, 0.01, 0.02, 0.3!, 0.4 ..., 0.44
                   barrier='max',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if di <= di_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize EO (ndb., log-barrier)

In [None]:
model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-log')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=0.025, #0.025
               lamb=0.06, #0.01, ..., 0.05, 0.06!, 0.07, ..., 0.44
               barrier='log',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.44
model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-log')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
eo_nofairness = results_nofairness[diameter][comparison_algo]['eo']
print('equal opportunity of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(eo_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,50) if round(start_hyper-i*0.01,2)>0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-log')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=0.025, #0.025
                   lamb=lamb, #0.01, ..., 0.05, 0.06!, 0.07, ..., 0.44
                   barrier='log',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if eo >= eo_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(c))
        break
    results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

#### Method: Optimize EO (ndb., max-barrier)

In [None]:
model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-max')
model_clas.fit(X_clas_train,
               X_sen_train,
               y_clas_train,
               start_thresholds=start_thresholds,
               mu=100, #100
               lamb=0.03, #0.0, 0.01, 0.02, 0.03!, 0.04, ..., 0.44
               barrier='max',
               acc_best=acc_best,
               print_coeff=True)
acc,eo,di,TPRs = evaluate(model_clas)

In [None]:
# --- specify hyperparameters and model class
start_hyper = 0.44
model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-max')

# --- define constant for stopping criterium
comparison_algo = comparisons[model_clas.alias]
eo_nofairness = results_nofairness[diameter][comparison_algo]['eo']
print('equal opportunity of approx. {} '\
      'from {} algorithm is used as a stopping criterium.'.format(round(eo_nofairness,5),
                                                                        comparison_algo))

# --- test model for different hyperparameters
Cs = [round(start_hyper-i*0.01,2) for i in range(0,50) if round(start_hyper-i*0.01,2)>=0]
print('\nHyperparameters to test:\n', Cs)
results_fairness[diameter][model_clas.alias] = dict()
results_fairness[diameter][model_clas.alias]['ACCs'] = list()
results_fairness[diameter][model_clas.alias]['EOs'] = list()
results_fairness[diameter][model_clas.alias]['DIs'] = list()
results_fairness[diameter][model_clas.alias]['Lambdas'] = list()
for lamb in Cs:
    print('\nlambda:', lamb)
    # --- train model for fixed hyperparameters
    model_clas = ETC_optimizeEO_ndb(alias='EO+ACC-ndb-max')
    model_clas.fit(X_clas_train,
                   X_sen_train,
                   y_clas_train,
                   start_thresholds=start_thresholds,
                   mu=100, #100
                   lamb=lamb, #0.0, 0.01, 0.02, 0.03!, 0.04, ..., 0.44
                   barrier='max',
                   acc_best=acc_best,
                   print_coeff=False)
    # --- evaluate model for fixed hyperparameters
    acc,eo,di,TPRs = evaluate(model_clas)
    # --- store evaluation until model is as unfair as comparison model
    if eo >= eo_nofairness:
        print('\nHyperparameter {} and larger were not used'.format(lamb))
        break
    results_d15[model_clas] = {'acc':acc,'eo':eo,'di':di,'TPRs':TPRs}
    results_fairness[diameter][model_clas.alias]['ACCs'].append(acc)
    results_fairness[diameter][model_clas.alias]['EOs'].append(eo)
    results_fairness[diameter][model_clas.alias]['DIs'].append(di)
    results_fairness[diameter][model_clas.alias]['Lambdas'].append(lamb)

### Visualize Results

#### Results for the Springer Paper

These plots are identical to the ones in the non-extended Springer version, only the methods' notations changed.

In [None]:
comparisons = {'TFPR+COV-db-log':'TFPR-db',
               'TFPR+COV-ndb-log':'TFPR-ndb',
               'TFPR+COV-ndb-max':'TFPR-ndb',
               'ACC+COV-db-log':'ACC-db',
               'ACC+COV-ndb-log':'ACC-ndb',
               'ACC+COV-ndb-max':'ACC-ndb',
               'COV+ACC-ndb-log':'ACC-ndb',
               'COV+ACC-ndb-max':'ACC-ndb',
               'EO+ACC-ndb-log':'ACC-ndb',
               'EO+ACC-ndb-max':'ACC-ndb',
               'DI+ACC-ndb-log':'ACC-db',            # this is different to this version
               'DI+ACC-ndb-max':'ACC-ndb'}

In [None]:
# extract only the methods presented in the Springer paper
aliase = ['H', 'TFPR-db', 'TFPR+COV-db-log', 'ACC-db','ACC+COV-db-log', 'DI+ACC-ndb-log']
results_d5_extract = dict()
for key in results_d5.keys():
    if key.alias in aliase:
        results_d5_extract[key] = results_d5[key]
        
df1, df2, df3, fig_d5 = graphics_bars(results_d5_extract,
                                      save_figs_d=5)

In [None]:
# extract only the methods presented in the Springer paper
aliase = ['H', 'TFPR-db', 'TFPR+COV-db-log', 'ACC-db', 'ACC+COV-db-log', 'DI+ACC-ndb-log']
results_d10_extract = dict()
for key in results_d10.keys():
    if key.alias in aliase:
        results_d10_extract[key] = results_d10[key]
        
df1, df2, df3, fig_d10 = graphics_bars(results_d10_extract,
                                       save_figs_d=10)

In [None]:
# extract only the methods presented in the Springer paper
aliase = ['H', 'TFPR-db', 'TFPR+COV-db-log', 'ACC-db', 'ACC+COV-db-log', 'DI+ACC-ndb-log']
results_d15_extract = dict()
for key in results_d15.keys():
    if key.alias in aliase:
        results_d15_extract[key] = results_d15[key]

df1, df2, df3, fig_d15 = graphics_bars(results_d15_extract,
                                       save_figs_d=15)

In [None]:
# extract only the methods presented in the Springer paper
aliase_fairness = ['TFPR+COV-db-log','ACC+COV-db-log', 'DI+ACC-ndb-log']
results_fairness_extract = dict()
results_fairness_extract[5] = dict()
results_fairness_extract[10] = dict()
results_fairness_extract[15] = dict()
for key in results_fairness[5].keys():
    if key in aliase_fairness:
        results_fairness_extract[5][key] = results_fairness[5][key]
        results_fairness_extract[10][key] = results_fairness[10][key]
        results_fairness_extract[15][key] = results_fairness[15][key]

In [None]:
graphics_scatter(results_fairness_extract, 
                 results_nofairness,
                 comparisons,
                 horizontal=False,
                 save_figs=True)

In [None]:
graphics_lines(results_fairness_extract,
               results_nofairness,
               comparisons,
               with_eo=False,
               save_figs=True)

#### Results for the PeerJ Journal Paper

In [None]:
comparisons = {'TFPR+COV-db-log':'TFPR-db',
               'TFPR+COV-ndb-log':'TFPR-ndb',
               'TFPR+COV-ndb-max':'TFPR-ndb',
               'ACC+COV-db-log':'ACC-db',
               'ACC+COV-ndb-log':'ACC-ndb',
               'ACC+COV-ndb-max':'ACC-ndb',
               'COV+ACC-ndb-log':'ACC-ndb',
               'COV+ACC-ndb-max':'ACC-ndb',
               'EO+ACC-ndb-log':'ACC-ndb',
               'EO+ACC-ndb-max':'ACC-ndb',
               'DI+ACC-ndb-log':'ACC-ndb',           # this is different to the Springer-version
               'DI+ACC-ndb-max':'ACC-ndb'}

##### TFPR (with titles)

In [None]:
# compare TFPR and TFPR+COV for both db. and ndb. optimization and for both barrier methods
aliase = ['H', 'TFPR-db', 'TFPR+COV-db-log', 'TFPR-ndb', 'TFPR+COV-ndb-log', 'TFPR+COV-ndb-max']
results_d5_extract = dict()
for key in results_d5.keys():
    if key.alias in aliase:
        results_d5_extract[key] = results_d5[key]
        
df1, df2, df3, fig_d5 = graphics_bars(results_d5_extract,
                                      rotate=True,
                                      title=True,
                                      save_figs_d=5)

In [None]:
# compare TFPR and TFPR+COV for both db. and ndb. optimization and for both barrier methods
aliase = ['H', 'TFPR-db', 'TFPR+COV-db-log', 'TFPR-ndb', 'TFPR+COV-ndb-log', 'TFPR+COV-ndb-max']
results_d10_extract = dict()
for key in results_d10.keys():
    if key.alias in aliase:
        results_d10_extract[key] = results_d10[key]
        
df1, df2, df3, fig_d10 = graphics_bars(results_d10_extract,
                                       rotate=True,
                                       title=True,
                                       save_figs_d=10)

In [None]:
# compare TFPR and TFPR+COV for both db. and ndb. optimization and for both barrier methods
aliase = ['H', 'TFPR-db', 'TFPR+COV-db-log', 'TFPR-ndb', 'TFPR+COV-ndb-log', 'TFPR+COV-ndb-max']
results_d15_extract = dict()
for key in results_d15.keys():
    if key.alias in aliase:
        results_d15_extract[key] = results_d15[key]
        
df1, df2, df3, fig_d15 = graphics_bars(results_d15_extract,
                                       rotate=True,
                                       title=True,
                                       save_figs_d=15)

In [None]:
# compare TFPR+COV for both db. and ndb. optimization and for both barrier methods
aliase = ['TFPR+COV-db-log', 'TFPR+COV-ndb-log', 'TFPR+COV-ndb-max']
results_fairness_extract = dict()
results_fairness_extract[5] = dict()
results_fairness_extract[10] = dict()
results_fairness_extract[15] = dict()
for key in results_fairness[5].keys():
    if key in aliase:
        results_fairness_extract[5][key] = results_fairness[5][key]
        results_fairness_extract[10][key] = results_fairness[10][key]
        results_fairness_extract[15][key] = results_fairness[15][key]

In [None]:
graphics_scatter(results_fairness_extract, 
                 results_nofairness,
                 comparisons,
                 title=True,
                 horizontal=True,
                 save_figs=True)

##### ACC (without titles)

In [None]:
# compare ACC and ACC+COV for both db. and ndb. optimization and for both barrier methods
aliase = ['H', 'ACC-db','ACC+COV-db-log', 'ACC-ndb','ACC+COV-ndb-log', 'ACC+COV-ndb-max']
results_d5_extract = dict()
for key in results_d5.keys():
    if key.alias in aliase:
        results_d5_extract[key] = results_d5[key]
        
df1, df2, df3, fig_d5 = graphics_bars(results_d5_extract,
                                      rotate=True,
                                      title=False,
                                      save_figs_d=5)

In [None]:
# compare ACC and ACC+COV for both db. and ndb. optimization and for both barrier methods
aliase = ['H', 'ACC-db','ACC+COV-db-log', 'ACC-ndb','ACC+COV-ndb-log', 'ACC+COV-ndb-max']
results_d10_extract = dict()
for key in results_d10.keys():
    if key.alias in aliase:
        results_d10_extract[key] = results_d10[key]
        
df1, df2, df3, fig_d10 = graphics_bars(results_d10_extract,
                                       rotate=True,
                                       title=False,
                                       save_figs_d=10)

In [None]:
# compare ACC and ACC+COV for both db. and ndb. optimization and for both barrier methods
aliase = ['H', 'ACC-db','ACC+COV-db-log', 'ACC-ndb','ACC+COV-ndb-log', 'ACC+COV-ndb-max']
results_d15_extract = dict()
for key in results_d15.keys():
    if key.alias in aliase:
        results_d15_extract[key] = results_d15[key]
        
df1, df2, df3, fig_d15 = graphics_bars(results_d15_extract,
                                       rotate=True,
                                       title=False,
                                       save_figs_d=15)

In [None]:
# compare ACC+COV for both db. and ndb. optimization and for both barrier methods
aliase = ['ACC+COV-db-log', 'ACC+COV-ndb-log', 'ACC+COV-ndb-max']
results_fairness_extract = dict()
results_fairness_extract[5] = dict()
results_fairness_extract[10] = dict()
results_fairness_extract[15] = dict()
for key in results_fairness[5].keys():
    if key in aliase:
        results_fairness_extract[5][key] = results_fairness[5][key]
        results_fairness_extract[10][key] = results_fairness[10][key]
        results_fairness_extract[15][key] = results_fairness[15][key]

In [None]:
graphics_scatter(results_fairness_extract, 
                 results_nofairness,
                 comparisons,
                 title=False,
                 horizontal=True,
                 save_figs=True)

##### COV (without titles)

In [None]:
# compare COV+ACC for both barrier methods
aliase = ['H', 'ACC-ndb', 'COV+ACC-ndb-log', 'COV+ACC-ndb-max']
results_d5_extract = dict()
for key in results_d5.keys():
    if key.alias in aliase:
        results_d5_extract[key] = results_d5[key]
        
df1, df2, df3, fig_d5 = graphics_bars(results_d5_extract,
                                      rotate=True,
                                      title=False,
                                      save_figs_d=5)

In [None]:
# compare COV+ACC for both barrier methods
aliase = ['H', 'ACC-ndb', 'COV+ACC-ndb-log', 'COV+ACC-ndb-max']
results_d10_extract = dict()
for key in results_d10.keys():
    if key.alias in aliase:
        results_d10_extract[key] = results_d10[key]
        
df1, df2, df3, fig_d10 = graphics_bars(results_d10_extract,
                                       rotate=True,
                                       title=False,
                                       save_figs_d=10)

In [None]:
# compare COV+ACC for both barrier methods
aliase = ['H', 'ACC-ndb', 'COV+ACC-ndb-log', 'COV+ACC-ndb-max']
results_d15_extract = dict()
for key in results_d15.keys():
    if key.alias in aliase:
        results_d15_extract[key] = results_d15[key]
        
df1, df2, df3, fig_d15 = graphics_bars(results_d15_extract,
                                       rotate=True,
                                       title=False,
                                       save_figs_d=15)

In [None]:
# compare COV+ACC for both barrier methods
aliase = ['ACC-ndb', 'COV+ACC-ndb-log', 'COV+ACC-ndb-max']
results_fairness_extract = dict()
results_fairness_extract[5] = dict()
results_fairness_extract[10] = dict()
results_fairness_extract[15] = dict()
for key in results_fairness[5].keys():
    if key in aliase:
        results_fairness_extract[5][key] = results_fairness[5][key]
        results_fairness_extract[10][key] = results_fairness[10][key]
        results_fairness_extract[15][key] = results_fairness[15][key]

In [None]:
graphics_scatter(results_fairness_extract, 
                 results_nofairness,
                 comparisons,
                 title=False,
                 horizontal=True,
                 save_figs=True)

##### DI (without titles)

In [None]:
# compare DI+ACC for both barrier methods
aliase = ['H', 'ACC-ndb', 'DI+ACC-ndb-log', 'DI+ACC-ndb-max']
results_d5_extract = dict()
for key in results_d5.keys():
    if key.alias in aliase:
        results_d5_extract[key] = results_d5[key]
        
df1, df2, df3, fig_d5 = graphics_bars(results_d5_extract,
                                      rotate=True,
                                      title=False,
                                      save_figs_d=5)

In [None]:
# compare DI+ACC for both barrier methods
aliase = ['H', 'ACC-ndb', 'DI+ACC-ndb-log', 'DI+ACC-ndb-max']
results_d10_extract = dict()
for key in results_d10.keys():
    if key.alias in aliase:
        results_d10_extract[key] = results_d10[key]
        
df1, df2, df3, fig_d10 = graphics_bars(results_d10_extract,
                                       rotate=True,
                                       title=False,
                                       save_figs_d=10)

In [None]:
# compare DI+ACC for both barrier methods
aliase = ['H', 'ACC-ndb', 'DI+ACC-ndb-log', 'DI+ACC-ndb-max']
results_d15_extract = dict()
for key in results_d15.keys():
    if key.alias in aliase:
        results_d15_extract[key] = results_d15[key]
        
df1, df2, df3, fig_d15 = graphics_bars(results_d15_extract,
                                       rotate=True,
                                       title=False,
                                       save_figs_d=15)

In [None]:
# compare DI+ACC for both barrier methods
aliase = ['H', 'ACC-ndb', 'DI+ACC-ndb-log', 'DI+ACC-ndb-max']
results_fairness_extract = dict()
results_fairness_extract[5] = dict()
results_fairness_extract[10] = dict()
results_fairness_extract[15] = dict()
for key in results_fairness[5].keys():
    if key in aliase:
        results_fairness_extract[5][key] = results_fairness[5][key]
        results_fairness_extract[10][key] = results_fairness[10][key]
        results_fairness_extract[15][key] = results_fairness[15][key]

In [None]:
graphics_scatter(results_fairness_extract, 
                 results_nofairness,
                 comparisons,
                 title=False,
                 horizontal=True,
                 save_figs=True)

##### DI (with titles)

In [None]:
# compare DI+ACC for both barrier methods
aliase = ['H', 'ACC-ndb', 'DI+ACC-ndb-log', 'DI+ACC-ndb-max']
results_d5_extract = dict()
for key in results_d5.keys():
    if key.alias in aliase:
        results_d5_extract[key] = results_d5[key]
        
df1, df2, df3, fig_d5 = graphics_bars(results_d5_extract,
                                      rotate=True,
                                      title=True,
                                      save_figs_d=5)

In [None]:
# compare DI+ACC for both barrier methods
aliase = ['H', 'ACC-ndb', 'DI+ACC-ndb-log', 'DI+ACC-ndb-max']
results_d10_extract = dict()
for key in results_d10.keys():
    if key.alias in aliase:
        results_d10_extract[key] = results_d10[key]
        
df1, df2, df3, fig_d10 = graphics_bars(results_d10_extract,
                                       rotate=True,
                                       title=True,
                                       save_figs_d=10)

In [None]:
# compare DI+ACC for both barrier methods
aliase = ['H', 'ACC-ndb', 'DI+ACC-ndb-log', 'DI+ACC-ndb-max']
results_d15_extract = dict()
for key in results_d15.keys():
    if key.alias in aliase:
        results_d15_extract[key] = results_d15[key]
        
df1, df2, df3, fig_d15 = graphics_bars(results_d15_extract,
                                       rotate=True,
                                       title=True,
                                       save_figs_d=15)

##### Fairness-Accuarcy tradeoff

In [None]:
# compare best method per subcategory
aliase_fairness = ['TFPR+COV-ndb-log', 'ACC+COV-ndb-log', 'DI+ACC-ndb-max']
results_fairness_extract = dict()
results_fairness_extract[5] = dict()
results_fairness_extract[10] = dict()
results_fairness_extract[15] = dict()
for key in results_fairness[5].keys():
    if key in aliase_fairness:
        results_fairness_extract[5][key] = results_fairness[5][key]
        results_fairness_extract[10][key] = results_fairness[10][key]
        results_fairness_extract[15][key] = results_fairness[15][key]

In [None]:
graphics_scatter(results_fairness_extract, 
                 results_nofairness,
                 comparisons,
                 title=True,
                 horizontal=True,
                 save_figs=True)

In [None]:
# compare best method per subcategory
aliase_fairness = ['COV+ACC-ndb-log', 'DI+ACC-ndb-max']
results_fairness_extract = dict()
results_fairness_extract[5] = dict()
results_fairness_extract[10] = dict()
results_fairness_extract[15] = dict()
for key in results_fairness[5].keys():
    if key in aliase_fairness:
        results_fairness_extract[5][key] = results_fairness[5][key]
        results_fairness_extract[10][key] = results_fairness[10][key]
        results_fairness_extract[15][key] = results_fairness[15][key]

In [None]:
graphics_scatter(results_fairness_extract, 
                 results_nofairness,
                 comparisons,
                 title=False,
                 horizontal=True,
                 save_figs=True)

##### Hyperparameter dependence

In [None]:
# compare best method per subcategory
aliase_fairness = ['TFPR+COV-ndb-log', 'ACC+COV-ndb-log', 'COV+ACC-ndb-log', 'DI+ACC-ndb-max']
results_fairness_extract = dict()
results_fairness_extract[5] = dict()
results_fairness_extract[10] = dict()
results_fairness_extract[15] = dict()
for key in results_fairness[5].keys():
    if key in aliase_fairness:
        results_fairness_extract[5][key] = results_fairness[5][key]
        results_fairness_extract[10][key] = results_fairness[10][key]
        results_fairness_extract[15][key] = results_fairness[15][key]

In [None]:
graphics_lines(results_fairness_extract,
               results_nofairness,
               comparisons,
               columns='diameters',
               with_eo=True,
               save_figs=True)

In [None]:
# compare best method per subcategory
aliase_fairness = ['TFPR+COV-ndb-log', 'DI+ACC-ndb-max']
results_fairness_extract = dict()
results_fairness_extract[5] = dict()
results_fairness_extract[10] = dict()
results_fairness_extract[15] = dict()
for key in results_fairness[5].keys():
    if key in aliase_fairness:
        results_fairness_extract[5][key] = results_fairness[5][key]
        results_fairness_extract[10][key] = results_fairness[10][key]
        results_fairness_extract[15][key] = results_fairness[15][key]

In [None]:
# compare best method per subcategory
# requires plt.figure(figsize=(20,12.5))
graphics_lines(results_fairness_extract,
               results_nofairness,
               comparisons,
               columns='diameters',
               with_eo=True,
               save_figs=True)

#### Quick Check

In [None]:
aliase_fairness = [
                   #'TFPR+COV-db-log',
                   #'TFPR+COV-ndb-log',
                   #'TFPR+COV-ndb-max',
                   #'ACC+COV-db-log',
                   #'ACC+COV-ndb-log',
                   #'ACC+COV-ndb-max',
                   #'COV+ACC-ndb-log',
                   #'COV+ACC-ndb-max',
                   #'DI+ACC-ndb-log',
                   #'DI+ACC-ndb-max',
                   #'EO+ACC-ndb-log',
                   #'EO+ACC-ndb-max'
                  ]
results_fairness_extract = dict()
results_fairness_extract[5] = dict()
results_fairness_extract[10] = dict()
results_fairness_extract[15] = dict()
for key in results_fairness[5].keys():
    if key in aliase_fairness:
        results_fairness_extract[5][key] = results_fairness[5][key]
        results_fairness_extract[10][key] = results_fairness[10][key]
        results_fairness_extract[15][key] = results_fairness[15][key]
        
graphics_lines(results_fairness_extract,
               results_nofairness,
               comparisons,
               columns='methods',
               with_eo=True,
               save_figs=False)