In [13]:
import sys
sys.path.insert(1, "../")  

import numpy as np
np.random.seed(0)
import pandas as pd

from aif360.datasets import GermanDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import Reweighing

from IPython.display import Markdown, display

In [16]:
mimic_data = pd.read_csv('mimic3d.csv')
data_full = mimic_data.drop('ethnicity', 1)
data_full = data_full.drop('age', 1)
data_full = data_full.drop('gender', 1)
gender = mimic_data['gender'][:]
ethnicity = mimic_data['ethnicity'][:]
age = mimic_data['age'][:]

In [17]:
Z = pd.merge(gender, ethnicity, right_index=True, left_index=True)
Z = Z.assign(ethnicity=lambda df:(df['ethnicity'] == 'WHITE').astype(int))
Z = Z.assign(gender=lambda df:(df['gender'] == 'F').astype(int))

In [18]:
for i in range(age.size):
    if (age[i] < 65):
        age[i] = 1
    if (age[i] >= 65):
        age[i] = 0
y = age

In [19]:
X = data_full.drop('LOSgroupNum', 1)
X = X.drop('religion', 1)
X = X.drop('AdmitProcedure', 1)
X = X.drop('AdmitDiagnosis', 1)

X.head()

Unnamed: 0,hadm_id,LOSdays,admit_type,admit_location,insurance,marital_status,NumCallouts,NumDiagnosis,NumProcs,NumCPTevents,...,NumLabs,NumMicroLabs,NumNotes,NumOutput,NumRx,NumProcEvents,NumTransfers,NumChartEvents,ExpiredHospital,TotalNumInteract
0,100001,6.17,EMERGENCY,CLINIC REFERRAL/PREMATURE,Private,DIVORCED,0.16,2.59,0.0,1.3,...,43.44,0.65,0.05,5.19,14.91,1.13,0.65,398.7,0,493.89
1,100003,4.04,EMERGENCY,EMERGENCY ROOM ADMIT,Private,SINGLE,0.25,2.23,0.99,1.98,...,55.94,1.24,1.59,5.45,7.18,0.99,1.24,373.02,0,465.71
2,100006,12.04,EMERGENCY,EMERGENCY ROOM ADMIT,Private,SINGLE,0.0,0.75,0.17,0.83,...,33.39,0.33,0.15,4.15,6.23,0.0,0.33,286.21,0,344.0
3,100007,7.29,EMERGENCY,EMERGENCY ROOM ADMIT,Private,MARRIED,0.41,0.69,0.27,0.69,...,32.24,0.69,0.17,9.05,11.52,0.0,0.96,526.06,0,603.05
4,100009,4.88,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,Private,MARRIED,0.0,3.69,0.82,2.25,...,50.61,0.61,0.34,16.19,25.0,2.87,2.05,554.92,0,679.84


In [20]:
categorical_columns = [
                    'admit_type',
                    'admit_location',
                    'insurance',
                    'marital_status'
                      ]
for col in categorical_columns:
    if col in X.columns:
        one_hot_encoded = pd.get_dummies(X[col])
        X = X.drop(col, axis=1)
        X = X.join(one_hot_encoded, lsuffix='_left', rsuffix='_right')
        
X

Unnamed: 0,hadm_id,LOSdays,NumCallouts,NumDiagnosis,NumProcs,NumCPTevents,NumInput,NumLabs,NumMicroLabs,NumNotes,...,Medicare,Private,Self Pay,DIVORCED,LIFE PARTNER,MARRIED,SEPARATED,SINGLE,UNKNOWN (DEFAULT),WIDOWED
0,100001,6.17,0.16,2.59,0.00,1.30,25.12,43.44,0.65,0.05,...,0,1,0,1,0,0,0,0,0,0
1,100003,4.04,0.25,2.23,0.99,1.98,13.61,55.94,1.24,1.59,...,0,1,0,0,0,0,0,1,0,0
2,100006,12.04,0.00,0.75,0.17,0.83,11.46,33.39,0.33,0.15,...,0,1,0,0,0,0,0,1,0,0
3,100007,7.29,0.41,0.69,0.27,0.69,20.30,32.24,0.69,0.17,...,0,1,0,0,0,1,0,0,0,0
4,100009,4.88,0.00,3.69,0.82,2.25,20.49,50.61,0.61,0.34,...,0,1,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58971,199993,24.58,0.00,0.37,0.53,0.94,330.92,66.56,0.20,0.13,...,0,1,0,1,0,0,0,0,0,0
58972,199994,9.75,0.00,0.92,0.41,0.62,95.18,53.03,2.26,0.30,...,1,0,0,0,0,0,0,1,0,0
58973,199995,16.75,0.00,0.60,0.54,0.06,12.66,27.28,0.54,0.10,...,0,1,0,0,0,0,0,1,0,0
58974,199998,5.88,0.00,2.72,0.51,0.17,25.85,49.83,0.17,0.35,...,1,0,0,0,0,1,0,0,0,0


In [25]:
X = X.values

In [26]:
X

array([[1.00001e+05, 6.17000e+00, 1.60000e-01, ..., 0.00000e+00,
        0.00000e+00, 0.00000e+00],
       [1.00003e+05, 4.04000e+00, 2.50000e-01, ..., 1.00000e+00,
        0.00000e+00, 0.00000e+00],
       [1.00006e+05, 1.20400e+01, 0.00000e+00, ..., 1.00000e+00,
        0.00000e+00, 0.00000e+00],
       ...,
       [1.99995e+05, 1.67500e+01, 0.00000e+00, ..., 1.00000e+00,
        0.00000e+00, 0.00000e+00],
       [1.99998e+05, 5.88000e+00, 0.00000e+00, ..., 0.00000e+00,
        0.00000e+00, 0.00000e+00],
       [1.99999e+05, 5.50000e+00, 0.00000e+00, ..., 0.00000e+00,
        0.00000e+00, 0.00000e+00]])

In [35]:
dataset_orig_train, dataset_orig_test = np.array_split(X,2)

privileged_groups = [1]
unprivileged_groups = [0]

In [45]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
print(RW)
dataset_transf_train = RW.fit_transform(dataset_orig_train.tolist())

<aif360.algorithms.preprocessing.reweighing.Reweighing object at 0x7fdba25a0eb0>


AttributeError: 'list' object has no attribute 'protected_attributes'

In [None]:
metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train, 
                                               unprivileged_groups=unprivileged_groups,
                                               privileged_groups=privileged_groups)
display(Markdown("#### Transformed training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_train.mean_difference())