In [9]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
# efficacy metrics from sklearn
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

from holisticai.bias.metrics import *
from holisticai.bias.plots import group_pie_plot
from holisticai.bias.mitigation import Reweighing, LearningFairRepresentation
from holisticai.pipeline import Pipeline
from holisticai.bias.mitigation import MLDebiaser, EqualizedOdds
from collections import defaultdict
from holisticai.bias.mitigation import GridSearchReduction, ExponentiatedGradientReduction

# Settings
np.random.seed(0)
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Let's load the dataset
data = pd.read_csv('./../data/synth_data_for_training.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12645 entries, 0 to 12644
Columns: 316 entries, adres_aantal_brp_adres to checked
dtypes: int64(316)
memory usage: 30.5 MB


In [3]:
data.columns

Index(['adres_aantal_brp_adres', 'adres_aantal_verschillende_wijken',
       'adres_aantal_verzendadres', 'adres_aantal_woonadres_handmatig',
       'adres_dagen_op_adres', 'adres_recentst_onderdeel_rdam',
       'adres_recentste_buurt_groot_ijsselmonde',
       'adres_recentste_buurt_nieuwe_westen', 'adres_recentste_buurt_other',
       'adres_recentste_buurt_oude_noorden',
       ...
       'typering_hist_aantal', 'typering_hist_inburgeringsbehoeftig',
       'typering_hist_ind', 'typering_hist_sector_zorg', 'typering_ind',
       'typering_indicatie_geheime_gegevens', 'typering_other',
       'typering_transport__logistiek___tuinbouw',
       'typering_zorg__schoonmaak___welzijn', 'checked'],
      dtype='object', length=316)

In [4]:

protected_variables = ["persoon_geslacht_vrouw"]
output_variable = ["checked"]

# Simple preprocessing
X = pd.get_dummies(data.drop(protected_variables + output_variable, axis=1))
X = X.astype(np.float32)
y = data[output_variable]
group = ["persoon_geslacht_vrouw"]
group_a = data[group] == 1
group_b = data[group] == 0
data_ = [X, y, group_a, group_b]

# Train test split
dataset = train_test_split(*data_, test_size=0.25, shuffle=True)
train_data = dataset[::2]
test_data = dataset[1::2]

In [5]:
# dictionnary of metrics
metrics_dict={
        "Accuracy": metrics.accuracy_score,
        "Balanced accuracy": metrics.balanced_accuracy_score,
        "Precision": metrics.precision_score,
        "Recall": metrics.recall_score,
        "F1-Score": metrics.f1_score}

# efficacy metrics dataframe helper tool
def metrics_dataframe(y_pred, y_true, metrics_dict=metrics_dict):
    metric_list = [[pf, fn(y_true, y_pred)] for pf, fn in metrics_dict.items()]
    return pd.DataFrame(metric_list, columns=["Metric", "Value"]).set_index("Metric")

In [6]:
pipeline = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('classifier', GradientBoostingClassifier()),
    ])

X, y, group_a, group_b = train_data
pipeline.fit(X, y)

X, y, group_a, group_b = test_data
y_pred = pipeline.predict(X)
df_baseline = classification_bias_metrics(group_a,
                            group_b,
                            y_pred,
                            y, metric_type='both')

df_eff_baseline = metrics_dataframe(y, y_pred)

In [7]:
def fit_and_evaluate_pipeline(pipeline, data_cls=None):

    X, y, group_a, group_b = train_data
    fit_params = {
        'bm__group_a': group_a,
        'bm__group_b': group_b
    }
    pipeline.fit(X, y, **fit_params)

    X, y, group_a, group_b = test_data
    predict_params = {
        'bm__group_a': group_a,
        'bm__group_b': group_b,
    }
    y_pred = pipeline.predict(X, **predict_params)

    df = classification_bias_metrics(group_a,
                                group_b,
                                y_pred,
                                y, metric_type='both')
    df_eff = metrics_dataframe(y, y_pred)
    return df,df_eff

def format_result_colum(name,df):
    return df.rename(columns={'Value':name}).iloc[:,0]

def show_bias_result_table(configurations, df_baseline):
    table = pd.concat([df_baseline.iloc[:,0]] + [format_result_colum(name,config['result']['bias'])
            for name,config in configurations.items()] + [df_baseline.iloc[:,1]],axis=1)
    return table.rename(columns={'Value':'Baseline'})

def show_efficacy_result_table(configurations, df_baseline):
    table = pd.concat([df_baseline.iloc[:,0]] + [format_result_colum(name,config['result']['efficacy'])
            for name,config in configurations.items()],axis=1)
    return table.rename(columns={'Value':'Baseline'})

In [197]:
configurations = defaultdict(dict)

configurations['MLDebiaser']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('classifier', GradientBoostingClassifier()),
    ('bm_postprocessing', MLDebiaser(sgd_steps=10_000,
                       full_gradient_epochs=500,
                       max_iter=20, verbose=True)),
    ])

for config_name,config in configurations.items():
    bias_metrics,eff_metrics = fit_and_evaluate_pipeline(config['pipeline'])
    config['result'] = {'bias':bias_metrics, 'efficacy':eff_metrics}

[elapsed time: 00:00:01 | iter:2/2]


In [198]:
show_bias_result_table(configurations, df_baseline)


Unnamed: 0_level_0,Baseline,MLDebiaser,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Statistical Parity,-0.01467,-0.01216,0
Disparate Impact,0.745035,0.853809,1
Four Fifths Rule,0.745035,0.853809,1
Cohen D,-0.067166,-0.045579,0
2SD Rule,-1.887248,-1.281063,0
Equality of Opportunity Difference,-0.058704,-0.030139,0
False Positive Rate Difference,-0.00625,-0.005404,0
Average Odds Difference,-0.032477,-0.017772,0
Accuracy Difference,0.003577,0.004433,0


In [199]:
configurations['Reweighing']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('bm_preprocessing', Reweighing()),
    ('classifier', GradientBoostingClassifier()),
    ])

from holisticai.bias.mitigation import LearningFairRepresentation
configurations['Learning Fair Representation']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('bm_preprocessing', LearningFairRepresentation(k=10, Ax=0.1, Ay=1.0, Az=2.0, verbose=1)),
    ('classifier', GradientBoostingClassifier()),
    ])

In [200]:
for config_name,config in configurations.items():
    bias_metrics,eff_metrics = fit_and_evaluate_pipeline(config['pipeline'])
    config['result'] = {'bias':bias_metrics, 'efficacy':eff_metrics}

[elapsed time: 00:00:01 | iter:2/2]


Optimization Progress: 6302it [17:15,  6.30it/s, loss: 0.775 L_x: 2.472 L_y: 0.523 L_z: 0.003]                          

In [None]:
show_bias_result_table(configurations, df_baseline)

Unnamed: 0_level_0,Baseline,Reweighing,Learning Fair Representation,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Statistical Parity,-0.01467,-0.011442,-0.000525,0
Disparate Impact,0.745035,0.805366,0.895154,1
Four Fifths Rule,0.745035,0.805366,0.895154,1
Cohen D,-0.067166,-0.051029,-0.007634,0
2SD Rule,-1.887248,-1.434174,-0.214628,0
Equality of Opportunity Difference,-0.058704,-0.031939,0.000562,0
False Positive Rate Difference,-0.00625,-0.00554,-0.000638,0
Average Odds Difference,-0.032477,-0.018739,-3.8e-05,0
Accuracy Difference,0.003577,0.005525,0.007687,0


In [None]:
configurations['Equalized Odds']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('classifier', GradientBoostingClassifier()),
    ('bm_postprocessing', EqualizedOdds()),
    ])

from holisticai.bias.mitigation import CalibratedEqualizedOdds
configurations['Calibrated Equalized Odds']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('classifier', GradientBoostingClassifier()),
    ('bm_postprocessing', CalibratedEqualizedOdds()),
    ])

from holisticai.bias.mitigation import RejectOptionClassification
configurations['Reject Option Classification']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('classifier', GradientBoostingClassifier()),
    ('bm_postprocessing', RejectOptionClassification(metric_name="Statistical parity difference", verbose=1)),
    ])

In [None]:
for config_name,config in configurations.items():
    bias_metrics,eff_metrics = fit_and_evaluate_pipeline(config['pipeline'])
    config['result'] = {'bias':bias_metrics, 'efficacy':eff_metrics}

Optimization Progress: 6302it [13:19,  7.88it/s, loss: 0.668 L_x: 2.483 L_y: 0.415 L_z: 0.002]
Optimization Progress: 6302it [15:42,  6.69it/s, loss: 0.679 L_x: 2.493 L_y: 0.425 L_z: 0.002]


In [None]:
show_bias_result_table(configurations, df_baseline)


Unnamed: 0_level_0,Baseline,Reweighing,Learning Fair Representation,Equalized Odds,Calibrated Equalized Odds,Reject Option Classification,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Statistical Parity,-0.01467,-0.011442,0.003314,-0.005274,-0.010292,-0.012818,0
Disparate Impact,0.745035,0.805366,1.662428,0.891875,0.80639,0.92266,1
Four Fifths Rule,0.745035,0.805366,0.60153,0.891875,0.80639,0.92266,1
Cohen D,-0.067166,-0.051029,0.040812,-0.025135,-0.048126,-0.035022,0
2SD Rule,-1.887248,-1.434174,1.147153,-0.706588,-1.352623,-0.984454,0
Equality of Opportunity Difference,-0.058704,-0.031939,0.008097,0.005623,-0.023617,-0.048583,0
False Positive Rate Difference,-0.00625,-0.00554,0.002895,-0.003439,-0.00555,-0.002383,0
Average Odds Difference,-0.032477,-0.018739,0.005496,0.001092,-0.014583,-0.025483,0
Accuracy Difference,0.003577,0.005525,0.005186,0.00794,0.006704,-0.002512,0


In [None]:
show_efficacy_result_table(configurations, df_eff_baseline)


Unnamed: 0_level_0,Baseline,Reweighing,Learning Fair Representation,Equalized Odds,Calibrated Equalized Odds,Reject Option Classification
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Accuracy,0.934851,0.937065,0.894371,0.932005,0.93327,0.917457
Balanced accuracy,0.909116,0.910583,0.615727,0.908956,0.90873,0.774837
Precision,0.428135,0.452599,0.021407,0.394495,0.409786,0.87156
Recall,0.880503,0.880952,0.333333,0.883562,0.881579,0.565476
F1-Score,0.576132,0.59798,0.04023,0.545455,0.559499,0.685921


In [None]:

configurations = defaultdict(dict)
model = GradientBoostingClassifier()
inprocessing_model = GridSearchReduction(constraints="DemographicParity", grid_size=20, verbose=1).transform_estimator(model)

configurations['GridSearch Reduction']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('bm_inprocessing', inprocessing_model),
    ])

from holisticai.bias.mitigation import ExponentiatedGradientReduction

model = GradientBoostingClassifier()
inprocessing_model = ExponentiatedGradientReduction(constraints="DemographicParity", verbose=1).transform_estimator(model)

configurations['ExponentiatedGradient Reduction']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('bm_inprocessing', inprocessing_model),
    ])

In [None]:
for config_name,config in configurations.items():
    bias_metrics,eff_metrics = fit_and_evaluate_pipeline(config['pipeline'])
    config['result'] = {'bias':bias_metrics, 'efficacy':eff_metrics}

steps: 7	Best gap:0.4759

In [None]:
show_bias_result_table(configurations, df_baseline)


Unnamed: 0_level_0,Baseline,GridSearch Reduction,ExponentiatedGradient Reduction,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Statistical Parity,-0.01467,-0.01467,-0.012765,0
Disparate Impact,0.745035,0.745035,0.775706,1
Four Fifths Rule,0.745035,0.745035,0.775706,1
Cohen D,-0.067166,-0.067166,-0.058263,0
2SD Rule,-1.887248,-1.887248,-1.637306,0
Equality of Opportunity Difference,-0.058704,-0.058704,-0.045884,0
False Positive Rate Difference,-0.00625,-0.00625,-0.00555,0
Average Odds Difference,-0.032477,-0.032477,-0.025717,0
Accuracy Difference,0.003577,0.003577,0.004231,0


In [38]:

configurations = defaultdict(dict)
model = GradientBoostingClassifier()

inprocessing_model = ExponentiatedGradientReduction(constraints="EqualizedOdds", max_iter=50,  verbose=1).transform_estimator(model)

# configurations['Ultimaterew ']['pipeline'] = Pipeline(steps=[
#     ('scaler', StandardScaler()),
#     ('bm_preprocessing', Reweighing()),
#     ('scaler2', StandardScaler()),
#     ('bm_inprocessing', inprocessing_model),
#     ('bm_postprocessing', MLDebiaser(sgd_steps=10_000,
#                        full_gradient_epochs=500,
#                        max_iter=20, verbose=True)),
#     ])

configurations['Ultimate lfr']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    # ('bm_preprocessing', LearningFairRepresentation(k=10, Ax=0.1, Ay=1.0, Az=2.0, verbose=1)),
    ('bm_inprocessing', inprocessing_model)
    ])

In [39]:
for config_name,config in configurations.items():
    bias_metrics,eff_metrics = fit_and_evaluate_pipeline(config['pipeline'])
    config['result'] = {'bias':bias_metrics, 'efficacy':eff_metrics}

steps: 5	Best gap:0.0022

In [40]:
show_bias_result_table(configurations, df_baseline)


Unnamed: 0_level_0,Baseline,Ultimate lfr,Reference
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Statistical Parity,-0.01467,-0.014641,0
Disparate Impact,0.745035,0.750949,1
Four Fifths Rule,0.745035,0.750949,1
Cohen D,-0.067166,-0.06625,0
2SD Rule,-1.887248,-1.861527,0
Equality of Opportunity Difference,-0.058704,-0.074561,0
False Positive Rate Difference,-0.00625,-0.00416,0
Average Odds Difference,-0.032477,-0.039361,0
Accuracy Difference,0.003577,-0.000118,0


In [42]:
pipeline = configurations['Ultimate lfr']['pipeline']
import onnx
from skl2onnx.common.data_types import FloatTensorType
from sklearn.ensemble import GradientBoostingClassifier
from skl2onnx import convert_sklearn


# Let's convert the model to ONNX
onnx_model = convert_sklearn(
    pipeline, initial_types=[('X', FloatTensorType((None, X.shape[1])))],
    target_opset=12)

# Let's save the model


onnx.save(onnx_model, "model/exponentiated_gradient_model_1.onnx")


MissingShapeCalculator: Unable to find a shape calculator for type '<class 'holisticai.pipeline.handlers._estimator.WEstimator'>'.
It usually means the pipeline being converted contains a
transformer or a predictor with no corresponding converter
implemented in sklearn-onnx. If the converted is implemented
in another library, you need to register
the converted so that it can be used by sklearn-onnx (function
update_registered_converter). If the model is not yet covered
by sklearn-onnx, you may raise an issue to
https://github.com/onnx/sklearn-onnx/issues
to get the converter implemented or even contribute to the
project. If the model is a custom model, a new converter must
be implemented. Examples can be found in the gallery.


In [None]:
from collections import defaultdict
configurations = defaultdict(dict)

from holisticai.bias.mitigation import GridSearchReduction

model = GradientBoostingClassifier()
inprocessing_model = GridSearchReduction(constraints="DemographicParity", grid_size=20, verbose=1).transform_estimator(model)

configurations['GridSearch Reduction']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('bm_inprocessing', inprocessing_model),
    ])

from holisticai.bias.mitigation import ExponentiatedGradientReduction

model = GradientBoostingClassifier()
inprocessing_model = ExponentiatedGradientReduction(constraints="DemographicParity", verbose=1).transform_estimator(model)

configurations['ExponentiatedGradient Reduction']['pipeline'] = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('bm_inprocessing', inprocessing_model),
    ])