In [1]:
# Load all necessary packages
import sys
sys.path.insert(1, "../")  

import numpy as np
np.random.seed(0)

from aif360.datasets import HeartFailureDataset, DropoutChanceDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import Reweighing

from IPython.display import Markdown, display

  warn_deprecated('vmap', 'torch.vmap')


In [2]:
dataset_orig = DropoutChanceDataset(protected_attribute_names=['Age_at_enrollment'],
                                   privileged_classes=[lambda x: x < 22])

dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

privileged_groups = [{'Age_at_enrollment': 1}]
unprivileged_groups = [{'Age_at_enrollment': 0}]

metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())

RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
dataset_transf_train = RW.fit_transform(dataset_orig_train)

metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train,
                                               unprivileged_groups=unprivileged_groups,
                                               privileged_groups=privileged_groups)
display(Markdown("#### Transformed training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_train.mean_difference())

This IS THE FILEPATH USED:  aif360/data/raw/dropout/data.csv


#### Original training dataset

Difference in mean outcomes between unprivileged and privileged groups = -0.286817


#### Transformed training dataset

Difference in mean outcomes between unprivileged and privileged groups = 0.000000


In [3]:
dataset_orig = HeartFailureDataset(protected_attribute_names=['age'],
                                   privileged_classes=[lambda x: x > 55])

dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())

RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
dataset_transf_train = RW.fit_transform(dataset_orig_train)

metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train,
                                               unprivileged_groups=unprivileged_groups,
                                               privileged_groups=privileged_groups)
display(Markdown("#### Transformed training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_train.mean_difference())

This IS THE FILEPATH USED:  aif360/data/raw/heart_failure/heart_failure_clinical_records_dataset.csv


#### Original training dataset

Difference in mean outcomes between unprivileged and privileged groups = -0.183784


#### Transformed training dataset

Difference in mean outcomes between unprivileged and privileged groups = -0.000000
