In [1]:
pip install 'aif360[all]'

Collecting aif360[all]
  Downloading aif360-0.6.1-py3-none-any.whl.metadata (5.0 kB)
Collecting skorch (from aif360[all])
  Downloading skorch-1.0.0-py3-none-any.whl.metadata (11 kB)
Collecting jupyter (from aif360[all])
  Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting sphinx-rtd-theme (from aif360[all])
  Downloading sphinx_rtd_theme-3.0.1-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting igraph[plotting] (from aif360[all])
  Downloading igraph-0.11.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting lime (from aif360[all])
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting fairlearn~=0.7 (from aif360[all])
  Downloading fairlearn-0.10.0-py3-none-any.whl.metadata (7.0 kB)
Collecting colorama (from aif360[all])
  Downloading colorama-0.4.6-p

In [2]:
#Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.display import Markdown, display

# Dataset
# from aif360.datasets import GermanDataset

# Fairness metrics
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric

# Scalers
from sklearn.preprocessing import StandardScaler

# Classifiers
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn import preprocessing

  vect_normalized_discounted_cumulative_gain = vmap(
  monte_carlo_vect_ndcg = vmap(vect_normalized_discounted_cumulative_gain, in_dims=(0,))


In [4]:
heart = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Ethics_A2/heart.csv')
heart.head()

Unnamed: 0,Age,Sex,Race,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,Asian,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,Other,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,Other,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,White,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,Other,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [5]:
heart["Race"].value_counts()
# No obvious fairness issue for race

Unnamed: 0_level_0,count
Race,Unnamed: 1_level_1
White,191
Other,186
Black,183
Hispanic,181
Asian,177


In [6]:
from aif360.datasets import StandardDataset
# 1. Identify categorical features
categorical_features = heart.select_dtypes(include=['object']).columns

# 2. Create a LabelEncoder object
le = preprocessing.LabelEncoder()

# 3. Encode the categorical features
for feature in categorical_features:
    heart[feature] = le.fit_transform(heart[feature])

In [7]:
dataset_orig = StandardDataset(
    df=heart,
    label_name='HeartDisease',  # Assuming 'HeartDisease' is the target variable column
    favorable_classes=[1],
    protected_attribute_names=['Age'],
    privileged_classes=[lambda x: x >= 47 and x <=62],
    features_to_drop=['Sex']
)
dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

privileged_groups = [{'Age': 1}]
unprivileged_groups = [{'Age': 0}]

In [8]:
model = make_pipeline(StandardScaler(),
                      LogisticRegression(solver='liblinear', random_state=1))
fit_params = {'logisticregression__sample_weight': dataset_orig_train.instance_weights}

lr_orig= model.fit(dataset_orig_train.features, dataset_orig_train.labels.ravel(), **fit_params)

In [9]:
y_test=model.predict(dataset_orig_test.features)
dataset_pred= dataset_orig_test.copy()
dataset_pred.labels=y_test

metric = ClassificationMetric(
                dataset_orig_test, dataset_pred,
                unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)

display(Markdown("#### Classifier mean difference"))
print("Difference in mean between unprivileged and privileged groups  = %f" % metric.mean_difference())
display(Markdown("#### Classifier TPR difference"))
print("Difference in TPR between unprivileged and privileged groups  = %f" % metric.true_positive_rate_difference())
display(Markdown("#### Classifier average_abs_odds_difference difference"))
print("Difference in average of absolute difference in FPR and TPR between unprivileged and privileged groups  = %f" % metric.average_abs_odds_difference())
"""
display(Markdown("#### Classifier accuracy difference"))
print("Difference in acc between unprivileged and privileged groups  = %f" % abs(metric.accuracy(1)-metric.accuracy(0)))
display(Markdown("#### Classifier precision difference"))
print("Difference in ppv between unprivileged and privileged groups  = %f" % abs(metric.precision(1)-metric.precision(0)))
"""

#### Classifier mean difference

Difference in mean between unprivileged and privileged groups  = -0.214445


#### Classifier TPR difference

Difference in TPR between unprivileged and privileged groups  = -0.153846


#### Classifier average_abs_odds_difference difference

Difference in average of absolute difference in FPR and TPR between unprivileged and privileged groups  = 0.141403


'\ndisplay(Markdown("#### Classifier accuracy difference"))\nprint("Difference in acc between unprivileged and privileged groups  = %f" % abs(metric.accuracy(1)-metric.accuracy(0)))\ndisplay(Markdown("#### Classifier precision difference"))\nprint("Difference in ppv between unprivileged and privileged groups  = %f" % abs(metric.precision(1)-metric.precision(0)))\n'

In [10]:
from aif360.algorithms.preprocessing.reweighing import Reweighing
# Metric for the original dataset
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())

#### Original training dataset

Difference in mean outcomes between unprivileged and privileged groups = -0.105907


In [11]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
               privileged_groups=privileged_groups)
RW.fit(dataset_orig_train)
dataset_transf_train = RW.transform(dataset_orig_train)

In [12]:
### Testing
assert np.abs(dataset_transf_train.instance_weights.sum()-dataset_orig_train.instance_weights.sum())<1e-6

In [13]:
metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train,
                                         unprivileged_groups=unprivileged_groups,
                                         privileged_groups=privileged_groups)
display(Markdown("#### Transformed training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_train.mean_difference())

#### Transformed training dataset

Difference in mean outcomes between unprivileged and privileged groups = 0.000000


In [14]:
### Testing
assert np.abs(metric_transf_train.mean_difference()) < 1e-6

In [23]:
scale_transf = StandardScaler()
X_train = scale_transf.fit_transform(dataset_transf_train.features)
y_train = dataset_transf_train.labels.ravel()

lmod = LogisticRegression()
lmod.fit(X_train, y_train,
        sample_weight=dataset_transf_train.instance_weights)
y_train_pred = lmod.predict(X_train)
# positive class index
pos_ind = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]

In [24]:
dataset_transf_test_pred = dataset_orig_test.copy(deepcopy=True)
X_test = scale_transf.fit_transform(dataset_transf_test_pred.features)
y_test = dataset_transf_test_pred.labels
dataset_transf_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)

metric_trans = ClassificationMetric(
    dataset_transf_test_pred, dataset_orig_test,
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups)

display(Markdown("#### Classifier mean difference after reweighing"))
print("Difference in mean between unprivileged and privileged groups  = %f" % metric_trans.mean_difference())
display(Markdown("#### Classifier TPR difference after reweighing"))
print("Difference in TPR between unprivileged and privileged groups  = %f" % metric_trans.true_positive_rate_difference())
display(Markdown("#### Classifier average_abs_odds_difference after reweighing"))
print("Difference in average of absolute difference in FPR and TPR between unprivileged and privileged groups  = %f" % metric_trans.average_abs_odds_difference())

#### Classifier mean difference after reweighing

Difference in mean between unprivileged and privileged groups  = -0.104651


#### Classifier TPR difference after reweighing

Difference in TPR between unprivileged and privileged groups  = 0.000000


#### Classifier average_abs_odds_difference after reweighing

Difference in average of absolute difference in FPR and TPR between unprivileged and privileged groups  = 0.000000
