In [21]:
import pandas as pd
import numpy as np

from fairlearn.metrics import MetricFrame
from fairlearn.metrics import count, \
                              false_positive_rate, \
                              selection_rate, equalized_odds_ratio
from fairlearn.preprocessing import CorrelationRemover
from fairlearn.reductions import ExponentiatedGradient, EqualizedOdds

 
                              
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import recall_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from fairlearn.metrics import (
    demographic_parity_difference,
    equalized_odds_difference,
)
from fairlearn.reductions import (  # noqa
    DemographicParity,
    EqualizedOdds,
    ExponentiatedGradient,
)

from constants import protected_attributes


# Settings
np.random.seed(0)
import warnings
warnings.filterwarnings("ignore")

In [23]:

ds_train = pd.read_csv('./../data/train.csv')
ds_test = pd.read_csv('./../data/test.csv')

# Let's specify the features and the target
y_train = ds_train["checked"]
X_train = ds_train.drop(['checked'], axis=1)
X_train = X_train.astype(np.float32)

# Let's specify the features and the target
y_test = ds_test["checked"]
X_test = ds_test.drop(['checked'], axis=1)
X_test = X_test.astype(np.float32)

In [24]:

constraint = DemographicParity()

# Define a gradient boosting classifier
classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)

print(X_train.shape)
print(y_train.shape)
print(len(protected_attributes))

mitigator = ExponentiatedGradient(classifier, constraint)
mitigator.fit(
    X_train, y_train, sensitive_features=X_train[protected_attributes[:2]])

(10116, 315)
(10116,)
45


In [25]:
params = mitigator.get_params()

model = GradientBoostingClassifier(init=params)


In [None]:
empty_df = pd.DataFrame({}, columns=X_train.columns)
for col in df.columns:
    empty_df[col] = empty_df[col].astype(df[col].dtypes.name)

# Result: An empty DataFrame with matching columns and data types
print(empty_df)
empty_df_y = pd.DataFrame({}, columns=y_train.columns)
for col in df.columns:
    empty_df[col] = empty_df_y[col].astype(df[col].dtypes.name)

# Result: An empty DataFrame with matching columns and data types
print(empty_df, empty_df_y)

In [27]:
model.fit(empty_df)

ImportError: cannot import name 'joblib' from 'sklearn.externals' (d:\tudelft\test-val-for-ai-project\.env\Lib\site-packages\sklearn\externals\__init__.py)

In [26]:
y_pred = model.predict(X_test)

NotFittedError: This GradientBoostingClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

In [None]:
X_test[protected_variables]

Unnamed: 0,persoon_geslacht_vrouw,belemmering_ind
1688,1.0,0.0
7251,1.0,0.0
5329,1.0,0.0
1697,1.0,1.0
8200,1.0,0.0
...,...,...
5646,1.0,1.0
10391,0.0,1.0
4083,0.0,0.0
4023,0.0,1.0


In [None]:

# Construct a function dictionary
my_metrics = {
    'tpr' : recall_score,
    'fpr' : false_positive_rate,
    'sel' : selection_rate,
    'count' : count
}

# Construct a MetricFrame
mf = MetricFrame(
    metrics=my_metrics,
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=X_test[protected_variables]
)
mf.overall

tpr         0.467320
fpr         0.005602
sel         0.050285
count    3162.000000
dtype: float64

In [None]:
mf.by_group


Unnamed: 0_level_0,Unnamed: 1_level_0,tpr,fpr,sel,count
persoon_geslacht_vrouw,belemmering_ind,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,0.0,0.423729,0.006525,0.043155,672.0
0.0,1.0,0.504587,0.007928,0.0625,992.0
1.0,0.0,0.3125,0.007421,0.032368,587.0
1.0,1.0,0.533333,0.001218,0.053787,911.0


In [None]:
mf.difference(method='to_overall')

tpr         0.154820
fpr         0.004384
sel         0.017917
count    2575.000000
dtype: float64

In [None]:
print(equalized_odds_ratio(y_test,
                               y_pred,
                               sensitive_features=X_test[protected_variables]))

0.15364538019836438


In [None]:
cr = CorrelationRemover(sensitive_feature_ids=protected_variables)

pipeline = Pipeline(
    steps=[
        ("preprocessor", cr),
        (
            "classifier",
            GradientBoostingClassifier(solver="liblinear", fit_intercept=True),
        ),
        
    ]
)

In [None]:
exponentiated_gradient = ExponentiatedGradient(
    estimator=pipeline,
    constraints=EqualizedOdds(),
    sample_weight_name="classifier__sample_weight",
)
exponentiated_gradient.fit(X_train, y_train, sensitive_features=A_train)
print(exponentiated_gradient.predict(X_test))