# Part 4: Improving fairness

## Loading the dataset and models

In [1]:
import joblib
import pandas as pd
from fairlearn.postprocessing import ThresholdOptimizer
from fairlearn.widget import FairlearnDashboard

In [2]:
model = joblib.load('../models/model.bin')

In [3]:
model_1 = joblib.load('../models/model_1.bin')

In [4]:
model_2 = joblib.load('../models/model_2.bin')

In [5]:
model_3 = joblib.load('../models/model_3.bin')

In [6]:
df_train = pd.read_csv('../data/processed/train.csv')
df_test = pd.read_csv('../data/processed/test.csv')

In [7]:
x_train = df_train.drop(['SEX', 'default.payment.next.month'], axis=1)
y_train = df_train['default.payment.next.month']

In [8]:
x_test = df_test.drop(['SEX', 'default.payment.next.month'], axis=1)
y_test = df_test['default.payment.next.month']

## Balancing the dataset

In [9]:
balanced_idx = df_train[y_train == 1].index
balanced_idx = balanced_idx.union(
    y_train[y_train==0].sample(n=balanced_idx.size).index
)

In [10]:
x_train_balanced = df_train.loc[balanced_idx, :].drop(['SEX', 'default.payment.next.month'], axis=1)
y_train_balanced = df_train.loc[balanced_idx, 'default.payment.next.month']

Once we have a prepared dataset, we can start working on optimizing the model using the `TresholdOptimizer`.

## Fairness Algorithm

In [11]:
optimizer = ThresholdOptimizer(estimator=model, constraints='demographic_parity')
optimizer_1 = ThresholdOptimizer(estimator=model_1, constraints='demographic_parity')
optimizer_2 = ThresholdOptimizer(estimator=model_2, constraints='demographic_parity')
optimizer_3 = ThresholdOptimizer(estimator=model_3, constraints='demographic_parity')

In [12]:
optimizer.fit(x_train_balanced, y_train_balanced, sensitive_features=df_train.loc[balanced_idx, 'SEX'])
optimizer_1.fit(x_train_balanced, y_train_balanced, sensitive_features=df_train.loc[balanced_idx, 'SEX'])
optimizer_2.fit(x_train_balanced, y_train_balanced, sensitive_features=df_train.loc[balanced_idx, 'SEX'])
optimizer_3.fit(x_train_balanced, y_train_balanced, sensitive_features=df_train.loc[balanced_idx, 'SEX'])

## Measuring fairness of the new model


In [13]:
FairlearnDashboard(
    sensitive_features=df_test['SEX'],
    sensitive_feature_names=['SEX'],
    y_true=df_test['default.payment.next.month'],
    y_pred=optimizer.predict(x_test, sensitive_features=df_test['SEX'])
)

FairlearnWidget(value={'true_y': [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x7f8f1d258100>

In [14]:
FairlearnDashboard(
    sensitive_features=df_test['SEX'],
    sensitive_feature_names=['SEX'],
    y_true=df_test['default.payment.next.month'],
    y_pred=optimizer_1.predict(x_test, sensitive_features=df_test['SEX'])
)

FairlearnWidget(value={'true_y': [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x7f8f1d23fa60>

In [15]:
FairlearnDashboard(
    sensitive_features=df_test['SEX'],
    sensitive_feature_names=['SEX'],
    y_true=df_test['default.payment.next.month'],
    y_pred=optimizer_2.predict(x_test, sensitive_features=df_test['SEX'])
)

FairlearnWidget(value={'true_y': [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x7f8f1b4f5910>

In [16]:
FairlearnDashboard(
    sensitive_features=df_test['SEX'],
    sensitive_feature_names=['SEX'],
    y_true=df_test['default.payment.next.month'],
    y_pred=optimizer_3.predict(x_test, sensitive_features=df_test['SEX'])
)

FairlearnWidget(value={'true_y': [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x7f8ef88b7400>

## Comparing the models

In [17]:
comparison = {
    'Original model': model.predict(x_test),
    'TresholdOptimizer': optimizer.predict(x_test, sensitive_features=df_test['SEX'])
}

In [18]:
comparison_1 = {
    'Original model': model_1.predict(x_test),
    'TresholdOptimizer': optimizer_1.predict(x_test, sensitive_features=df_test['SEX'])
}

In [19]:
comparison_2 = {
    'Original model': model_2.predict(x_test),
    'TresholdOptimizer': optimizer_2.predict(x_test, sensitive_features=df_test['SEX'])
}

In [20]:
comparison_3 = {
    'Original model': model_3.predict(x_test),
    'TresholdOptimizer': optimizer_3.predict(x_test, sensitive_features=df_test['SEX'])
}

## Showing the comparisions

In [21]:
FairlearnDashboard(
    sensitive_features=df_test['SEX'],
    sensitive_feature_names=['Gender'],
    y_true=df_test['default.payment.next.month'],
    y_pred=comparison
)

FairlearnWidget(value={'true_y': [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x7f8f1d26ba90>

In [22]:
FairlearnDashboard(
    sensitive_features=df_test['SEX'],
    sensitive_feature_names=['Gender'],
    y_true=df_test['default.payment.next.month'],
    y_pred=comparison_1
)

FairlearnWidget(value={'true_y': [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x7f8f1b50f070>

In [23]:
FairlearnDashboard(
    sensitive_features=df_test['SEX'],
    sensitive_feature_names=['Gender'],
    y_true=df_test['default.payment.next.month'],
    y_pred=comparison_2
)

FairlearnWidget(value={'true_y': [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x7f8f1d239220>

In [24]:
FairlearnDashboard(
    sensitive_features=df_test['SEX'],
    sensitive_feature_names=['Gender'],
    y_true=df_test['default.payment.next.month'],
    y_pred=comparison_3
)

FairlearnWidget(value={'true_y': [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x7f8f1b50fcd0>