<a href="https://colab.research.google.com/github/JoshuaOgbe/JoshuaOgbe/blob/main/HCUP_Substance_Abuse_Predict_Admitted_Patients_Length_of_Stay.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Steps: 
# 1. Install libraries
# 2. Import all libraries
# 4. Set parameters for FS, MT, Preprocess, fairness metrics etc. 
# 3. Preprocessing
# 4. Feature selection
# 5. Model trainign
# 6. Model evaulation
# 7. Unfairness metrics calculation of raw model. 
# 8. Mitigating the unfairness
# 9. Unfariness metrics for mitigated model. 

In [None]:
# Author: Kshitij Singh
# Subject: Code for LoS prediction for substance abuse patients. Evaluating fairness metrics and mitigating unfairness in models. 

In [None]:
# 1. Make all dataset consistent. 
# 1. Run the mitigation code. Save results for all. 
# 2. Do it for Xinyu's dataset and save the result. 
# 3. Send the code and result by email. 

In [None]:
!pip3 install fairlearn
!pip3 install pytorch_tabnet

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# Author: Kshitij Singh


# Imports
import pandas as pd
from sklearn import preprocessing
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest,chi2,RFE
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.ensemble import RandomForestClassifier

from sklearn.ensemble._bagging import BaggingClassifier
from sklearn.ensemble._forest import ExtraTreesClassifier

from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble._bagging import BaggingClassifier
from sklearn.metrics import confusion_matrix, fbeta_score

from fairlearn.metrics import MetricFrame
from fairlearn.metrics import selection_rate, count
from fairlearn.postprocessing import ThresholdOptimizer
from fairlearn.reductions import ExponentiatedGradient, ClassificationMoment, Moment, DemographicParity, GridSearch
from fairlearn.metrics import equalized_odds_difference, equalized_odds_ratio, false_negative_rate_difference, false_positive_rate_difference, true_positive_rate_difference, true_negative_rate_difference, demographic_parity_ratio, demographic_parity_difference

# Suppress warnings
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [None]:
# Setting parameters

alpha = 0.1 # Statistical significance for chi2 test. 
k = 3 # Minimum number of feature selection method selecting a given feature. 

models_to_train = [
    # LogisticRegression, 
    RandomForestClassifier, 
    # DecisionTreeClassifier, 
    XGBClassifier, 
    # BaggingClassifier
]


unfairness_metrics = [
    selection_rate, 
    fbeta_score, 
    equalized_odds_difference, 
    equalized_odds_ratio
]
model_evaulation = [
    accuracy_score, 
    recall_score, 
    precision_score, 
    f1_score 
]

demographic_features = [
    'RACE', 
    'FEMALE', 
    'PAY1', 
    'ZIPINC_QRTL', 
    'AGE'
]

# Choose the appropriate variable for selected features depending on the dataset being used. 
# Keep all categorical features numeric. 
selected_features_florida = [
 'DRG',
 'DHOUR',
 'PAY1_X',
 'MDC',
 'I10_NDX',
 'EDHOUR',
 'AHOUR',
 'AGE',
 'PSTCO2',
 'I10_NPR',
 'DaysToEvent',
 'ATYPE',
 'YEAR',
 'TRAN_OUT',
 'PointOfOriginUB04',
 'PSTCO',
 'POA_Hosp_Edit1',
 'P7EDSRC_X',
 'HCUP_ED',
 'EDYEAR',
 'DXPOA2',
 'DISPUB04',
 'AWEEKEND', 
 'RACE', 
 'FEMALE', 
 'AGE', 
 'ZIPINC_QRTL']

selected_features_maryland = ['AGE',
'ATYPE',
'AWEEKEND',
'BWT',
'DRG',
'DISPUNIFORM',
'DISP_X',
'DSHOSPID',
'DaysBurnUnit',
'DaysCCU',
'DaysICU',
'DaysNICU',
'DaysPICU',
'DaysShockUnit',
'DaysToEvent',
'FEMALE',
'HISPANIC',
'Homeless',
'HospitalUnit',
'LOS_status',
'MARITALSTATUSUB04',
'MEDINCSTQ',
'PSTATE',
'PSTCO',
'PointOfOriginUB04',
'RACE',
'READMIT',
'P7EDSRC_X',
'PAY1',
'PAY2',
'PAY3',
'TRAN_IN',
'TRAN_OUT',
'VisitLink',
'ZIPINC_QRTL',
'ZIP',
'PrimLang',
'AYEAR',
    ]

selected_features_arizona = ['AGE',
            'ATYPE', 'AWEEKEND', 'DISPUNIFORM',
            'HCUP_ED', 'I10_NDX','I10_NPR','MDC',
            'MDC_NoPOA','PRDAY2','PRMONTH1','TRAN_OUT',
            'ATOD','DTOD', 'TRAN_IN', 'AHOUR', 'AMONTH',
            'DHOUR', 'DMONTH', 'DXPOA1', 'FEMALE',
            'MEDINCSTQ', 'HISPANIC', 'Homeless',
            'MARITALSTATUSUB04','PAY1','RACE','ZIPINC_QRTL']

selected_features_teds = ['AGE', 'ALCDRUG', 'ALCFLG', 'ARRESTS', 'DAYWAIT',
                     'DETCRIM', 'DISYR', 'DIVISION', 'DSMCRIT', 'EDUC', 
                     'EMPLOY', 'EMPLOY_D', 'ETHNIC', 'FREQ1', 'FREQ1_D', 
                     'FREQ2', 'FREQ3_D', 'FREQ_ATND_SELF_HELP', 'FREQ_ATND_SELF_HELP_D', 
                     'FRSTUSE1', 'FRSTUSE2', 'FEMALE', 'HERFLG', 'HLTHINS', 
                     'MARFLG', 'MARSTAT', 'METHUSE', 'NOPRIOR', 'OPSYNFLG', 
                     'PREG', 'ZIPINC_QRTL', 'PSOURCE', 'PSYPROB',
                     'RACE', 'REASON', 'REGION', 'SERVICES', 'SERVICES_D', 
                     'STFIPS', 'SUB1_D', 'SUB2_D', 'SUB3', 'SUB3_D', 'PAY1_X'] 

file_path = '/content/drive/MyDrive/substance_only.csv'
output_path = '/content/'


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Inconsistency in datasets: 
# 1. In Maryland dataset target column is called LoS_status. 
# 1. In Maryland LOS_status is already encoded. 
# 2. In Arizona the target column is called LOS_X
# 2. In Arizona it is AGE and not AGE_group
# 2. In Arizona encoding is 'Long Stay' or 'Short Stay' whereas all others have 0 and 1. 
# 2. In Arizona a lot of columns use String for classes whereas other daatsets use label encoding or numerical values for encoding. For tree based algorithm one-hot encoding reduces their accuracy. 
# 3. Demographic feature names are not consistent across datasets. Following is the name of the features: 
# Insurance - Pay1_X
# Age - AGE
# Race - RACE
# Gender - FEMALE
# Income - ZIPINC_QRTL
# 4. In TEDS there is no ZIPINC_QRTL, No insurance information. 
# 5. Handling blank. Some dataset it is replaced with 99999 and in some with 100. 

In [None]:
from pandas.core.groupby.generic import DataFrame
from pandas.io.formats.format import DataFrameRenderer
# Preprocessing
# To do: 
# 1. Test reading data for Maryland and other dataset. 
# 2. Convert age into multiple categories 0 - 15, 15 - 25, 25 - 35. 

def read_data(file_path):
    """
    Return pandas dataframe. 
    """
    return pd.read_csv(file_path)

def clean_and_preprocess(df, columns_to_select, select_all=False):
    """
    Returns X_train, y_train, X_test, y_test. 
    """
    # To do: 
    # 1. Select only required columns for a given dataset. 
    # 2. Add option to select required columns or select all columns. 
    # df.drop(columns=['Unnamed: 0','HCUP_OS','LOS_X','DRG_NoPOA','DISPUNIFORM','DISP_X','MDNUM1_R','MDNUM2_R', 'MDNUM3_R','POA_Hosp_Edit2', 'POA_Hosp_Edit3',
        # 'POA_Hosp_Edit3_Value','MDC_NoPOA','HospitalUnit','PointOfOrigin_X', 'POA_Disch_Edit2','RACE_EDIT','RACE_X','TOTCHG_X','ZIP3', 'TOTCHG'],inplace=True)

    df.dropna(inplace=True)
    def fillSpace(x):
        if x ==' ':
            return 999999
        else:
            return x
        
    for i in df.columns:
        df[i]=df[i].apply(fillSpace)

    # Label encoding
    # le = preprocessing.LabelEncoder()
    # for i in ['DXPOA1','DXPOA2','P7EDSRC_X','PAY1_X']:
    #     df[i]=df[i].astype(str)
    # for i in ['DXPOA1','DXPOA2','P7EDSRC_X','PAY1_X']:
    #     df[i]=le.fit_transform(df[i])
    # df['PointOfOriginUB04']=df['PointOfOriginUB04'].astype(str)
    # df['PointOfOriginUB04']=le.fit_transform(df['PointOfOriginUB04'])


    def los_category(x):
        return x
        if x <= 5:
            return 0

        return 1
    df['LOS_category']=df['LOS_X'].apply(los_category)

    icd_codes=["F10","F11","F12","F13","F14","F15","F16","F17","F18","F19"]

    # df['Admit_DX_Presence']=df['I10_DX_Admitting'].str.contains("|".join(icd_codes), case=False, na = False)
    # df['First_DX_Presence']=df['I10_DX1'].str.contains("|".join(icd_codes), case=False, na = False)
    # df['Second_DX_Presence']=df['I10_DX2'].str.contains("|".join(icd_codes), case=False, na = False)


    # df['Admit_DX_Presence']=df['Admit_DX_Presence'].apply(lambda x: 0 if x == False else 1)
    # df['First_DX_Presence']=df['First_DX_Presence'].apply(lambda x: 0 if x == False else 1)
    # df['Second_DX_Presence']=df['Second_DX_Presence'].apply(lambda x: 0 if x== False else 1)

    # data_hcup.ZIPINC_QRTL = data_hcup.ZIPINC_QRTL.apply(lambda x: int(x))
    def get_race(x):
        if i == 1:
            return 'white'
        elif i == 2:
            return 'black'
        elif i == 3:
            return 'hispanic'
        elif i == 4:
            return 'asian'
        else:
            return 'others'

    df = df[df.ZIPINC_QRTL != '999999']
    df = df[df.AGE != 0]
    df['AGE'] = df['AGE'].apply(lambda x: 0 if x < 25 else 1)

    X=df[selected_features_arizona]
    y=df['LOS_category']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    return X_train, X_test, y_train, y_test, X, y, df
    


In [None]:
df=pd.read_csv(file_path)

# Doing this for TEDS
# df.rename(columns={"PRIMPAY": 'PAY1_X', 'PRIMINC': 'ZIPINC_QRTL', 'GENDER': 'FEMALE'}, inplace=True)
# X_train, X_test, y_train, y_test, X, y, df = clean_and_preprocess(df, selected_features_arizona)

In [None]:
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

df = df.astype({'AHOUR':'category', 'ATYPE':'category', 'AMONTH':'category', 'AWEEKEND':'category', 
                'DHOUR':'category', 'DIED':'category', 'DISPUNIFORM':'category', 'DMONTH':'category', 
                'DXPOA1':'category', 'DXPOA2':'category', 'FEMALE':'category', 'HCUP_ED':'category',
                'HISPANIC':'category', 'Homeless':'category','I10_BIRTH':'category', 'I10_DELIVERY':'category',
                'I10_NDX':'category','I10_NPR':'category', 'I10_PROCTYPE':'category',
                'LOS_X':'category',
                'MARITALSTATUSUB04':'category',
                'MDC':'category','MDC_NoPOA':'category', 'MEDINCSTQ':'category', 'PAY1':'category', 'PL_CBSA':'category', 
                'PL_NCHS':'category', 'PL_RUCC':'category','PL_UIC':'category', 'PL_UR_CAT4':'category',
                'PRDAY1':'category','PRDAY2':'category', 'PointOfOriginUB04':'category', 'SPLIT_IPED':'category',
                'PRMONTH1':'category','ATOD':'category','DTOD':'category','MDBOARD1':'category','MDBOARD2':'category',
                'PRDAY1':'category', 'RACE':'category', 'TRAN_IN':'category', 'TRAN_OUT':'category','ZIPINC_QRTL':'category',
                'PRMONTH1':'category','PRMONTH2':'category'})


Cat = df.select_dtypes(include = "category").columns
# print (Cat)

le = LabelEncoder()

for feat in Cat:
    df[feat] = le.fit_transform(df[feat].astype(str))

In [None]:
X_train, X_test, y_train, y_test, X, y, df = clean_and_preprocess(df, selected_features_arizona)

In [None]:
# # DO NOT RUN THIS. FEATURES ARE ALREADY SELECTED. 
# # Feature selection

# def select_features(X, y, k=3):
#     """
#     Returns a list of features selected by atleast k feature selection algorithms. 
#     """
#     # Chi2 selection
#     selector = SelectKBest(chi2, k=46)
#     selector.fit(X, y)
#     cols = selector.get_support(indices=True)
#     features_df_new = X.iloc[:,cols]

#     alpha = 0.05
#     chi_support = [j for i, j in zip(selector.pvalues_, cols) if i < alpha]

#     # RFE


#     # Pearson correlation
#     def cor_selector(X, y,num_feats):
#         cor_list = []
#         feature_name = X.columns.tolist()
#         # calculate the correlation with y for each feature
#         for i in X.columns.tolist():
#             cor = np.corrcoef(X[i], y)[0, 1]
#             cor_list.append(cor)
#         # replace NaN with 0
#         cor_list = [0 if np.isnan(i) else i for i in cor_list]
#         # feature name
#         cor_feature = X.iloc[:,np.argsort(np.abs(cor_list))[-num_feats:]].columns.tolist()
#         # feature selection? 0 for not select, 1 for select
#         cor_support = [True if i in cor_feature else False for i in feature_name]
#         return cor_support, cor_feature, cor_list
#     cor_support, cor_feature, cor_list = cor_selector(X, y, 20)
#     print(str(len(cor_feature)), 'selected features')


#     # RFE
#     # Selecting optimal number of features
#     feature_count = []
#     for i in range(2, len(X_train.columns)):
#         rfe_selector = RFE(estimator=LogisticRegression(), n_features_to_select=i, step=10, verbose=5)
#         rfe_selector.fit(X, y)
#         accuracy_s = accuracy_score(rfe_selector.predict(X_test), y_test)
#         rfe_support = rfe_selector.get_support()
#         rfe_feature = X.loc[:,rfe_support].columns.tolist()
#         feature_count.append((i, accuracy_s, rfe_feature))
#         print(str(len(rfe_feature)), 'selected features')
    
#     # Logistic Regression with L1 regularization
#     embeded_lr_selector = SelectFromModel(LogisticRegression(penalty="l1", solver="liblinear"), max_features=46)
#     embeded_lr_selector.fit(X, y)

#     embeded_lr_support = embeded_lr_selector.get_support()
#     embeded_lr_feature = X.loc[:,embeded_lr_support].columns.tolist()
#     print(str(len(embeded_lr_feature)), 'selected features')

#     # Feature selection using LightGBM model
#     lgbc=LGBMClassifier(n_estimators=500, learning_rate=0.05, num_leaves=32, colsample_bytree=0.2,
#                 reg_alpha=3, reg_lambda=1, min_split_gain=0.01, min_child_weight=40)

#     embeded_lgb_selector = SelectFromModel(lgbc, max_features=46)
#     embeded_lgb_selector.fit(X, y)

#     embeded_lgb_support = embeded_lgb_selector.get_support()
#     embeded_lgb_feature = X.loc[:,embeded_lgb_support].columns.tolist()
#     print(str(len(embeded_lgb_feature)), 'selected features')

#     # put all selection together
#     feature_selection_df = pd.DataFrame({'Feature':X.columns, 'Pearson':cor_support,
#                                          'Chi-2':chi_support, 'RFE':rfe_support,
#                                          'Logistics':embeded_lr_support,
#                                         'Random Forest':embeded_rf_support,
#                                          'LightGBM':embeded_lgb_support})
#     # count the selected times for each feature
#     feature_selection_df['Total'] = np.sum(feature_selection_df, axis=1)
#     # display the top 100
#     feature_selection_df = feature_selection_df.sort_values(['Total','Feature'] , ascending=False)
#     feature_selection_df.index = range(1, len(feature_selection_df)+1)
#     feature_selection_df.head(num_feats)

#     # Save the results
#     feature_selection_df.to_csv("FeatureSelected.csv", index=False)

#     return

In [None]:
# Model training
from sklearn.metrics import classification_report
def train_models_and_evaluate(X_train, y_train, X_test, y_test,
                              selected_features, 
                              models=models_to_train,
                              ):
    """
    Trains models, returns evaulation metrics and also saves the results 
    to a csv file in the local directory. 
    Returns a list of train model instances. 
    """
    df_metrics = pd.DataFrame(columns=['model', 'accuracy', 'precision', 'recall', 'f1'])
    trained_models = []
    X_train = X_train[selected_features_arizona]
    X_test = X_test[selected_features_arizona]
    for m in models:
        model = m()
        print(f"Fitting {model}...")
        model.fit(X_train, y_train)
        print(f"Predicting for {model}...")
        y_pred = model.predict(X_test)
        target_names=['0','1']
        print(classification_report(y_test, y_pred, target_names=target_names))
        # accuracy = accuracy_score(y_test, y_pred)
        # precision = precision_score(y_pred, y_test)
        # f1 = f1_score(y_pred, y_test)
        # recall = recall_score(y_pred, y_test)
        # print(str(m), accuracy, precision, recall, f1)
        df_metrics.loc[len(df_metrics.index)] = [str(m), 'accuracy', 'precision', 'recall', 'f1']
        trained_models.append(model)
    
    df_metrics.to_csv("models.csv", index=False)
    return trained_models



In [None]:
# selected_features = list(set([i.strip() for i in selected_features_maryland]))
ms = train_models_and_evaluate(X_train, y_train, X_test, y_test, selected_features_arizona)

Fitting RandomForestClassifier()...
Predicting for RandomForestClassifier()...
              precision    recall  f1-score   support

           0       0.74      0.68      0.71     19356
           1       0.82      0.86      0.84     32602

    accuracy                           0.79     51958
   macro avg       0.78      0.77      0.77     51958
weighted avg       0.79      0.79      0.79     51958

Fitting XGBClassifier()...
Predicting for XGBClassifier()...
              precision    recall  f1-score   support

           0       0.73      0.65      0.69     19356
           1       0.81      0.86      0.83     32602

    accuracy                           0.78     51958
   macro avg       0.77      0.76      0.76     51958
weighted avg       0.78      0.78      0.78     51958



In [None]:
# Unfariness metrics


import sklearn.metrics as skm
import functools
from fairlearn.metrics import equalized_odds_difference, equalized_odds_ratio

fbeta_06 = functools.partial(skm.fbeta_score, beta=0.6)
fbeta_04 = functools.partial(skm.fbeta_score, beta=0.4)
fbeta_05 = functools.partial(skm.fbeta_score, beta=0.5)
metric_fns = {'selection_rate': selection_rate,
              'fbeta_06': fbeta_06,
              'fbeta_04': fbeta_04,
              'fbeta_05': fbeta_05,
              'count': count}


def unfairness_metrics_evaluation(models, X_test, y_test, prefix="unmitigated"):
    """
    Saves the unfairness metrics for all models in a csv file. 
    Returns a list of dictionaries with key as model object and values 
    as a dictionary of unfairness metrics and their values. 
    """
    df_unfairness_metrics = pd.DataFrame(columns=['categories',
                                                  'demographic_feature',
                                                  'model', 
                                                  'selection_rate', 
                                                  'fbeta_06',
                                                  'fbeta_04',
                                                  'fbeta_05',
                                                  'count'])
    df_equalized_odds = pd.DataFrame(columns=['demographic_feature',
                                              'model',
                                              'equalized_odds_difference', 
                                              'equalized_odds_ratio', 
                                              'demographic_parity_ratio',
                                              'demographic_parity_difference', 
                                              'false_positive_rate_difference', 
                                                    'false_negative_rate_difference',  
                                                    'true_positive_rate_difference', 
                                                     'true_negative_rate_difference', 
                                                    ])
    
    df_overall_model_metrics = pd.DataFrame(columns=['model',
                                                    'false_positive_rate_difference', 
                                                    'false_negative_rate_difference',  
                                                    'true_positive_rate_difference', 
                                                     'true_negative_rate_difference', 
                                                    'overall_balance_error_rate'])
    for m in models:
        y_pred = m.predict(X_test[selected_features_arizona])
        metrics = []
        for d in demographic_features:
            print("Model:", m)
            print("Demographic feature", d)
            results = MetricFrame(metrics=metric_fns,
                                y_true=y_test,
                                y_pred=y_pred,
                                sensitive_features=X_test[[d]])
            results = results.by_group.reset_index()
            print('result')
            results.rename({str(d): 'categories'}, inplace=True)
            results['model'] = str(m)
            print("result2")
            results['demographic_feature'] = str(d)
            print(results)
            df_unfairness_metrics = df_unfairness_metrics.append(results, ignore_index=True)
            print('result1')
            equalized_odds_diff = equalized_odds_difference(y_test, y_pred, sensitive_features=X_test[[d]])
            equalized_odds_r = equalized_odds_ratio(y_test, y_pred, sensitive_features=X_test[[d]])
            demographic_parity_r = demographic_parity_ratio(y_test, y_pred, sensitive_features=X_test[[d]])
            demographic_parity_diff = demographic_parity_difference(y_test, y_pred, sensitive_features=X_test[[d]])

            print("result4")
            false_positive_rate_diff = false_positive_rate_difference(y_test, y_pred, sensitive_features=X_test[[d]])
            false_negative_rate_diff = false_negative_rate_difference(y_test, y_pred, sensitive_features=X_test[[d]])
            true_positive_rate_diff = true_positive_rate_difference(y_test, y_pred, sensitive_features=X_test[[d]])
            true_negative_rate_diff = true_negative_rate_difference(y_test, y_pred, sensitive_features=X_test[[d]])
            # overall_balance_error = overall_balance_error_rate(y_test, y_pred, sensitive_features=X_test[d])
            print("result5")
            df_equalized_odds = df_equalized_odds.append(pd.DataFrame([[str(d), str(m), 
                                                   equalized_odds_difference, 
                                                   equalized_odds_ratio, 
                                                   demographic_parity_ratio, 
                                                   demographic_parity_difference, 
                                                   false_positive_rate_diff, 
                                                      false_negative_rate_diff, 
                                                      true_positive_rate_diff, 
                                                      true_negative_rate_diff]]), 
                                     ignore_index=True)
            print("result6")

            # To do: 
            # 1. Add true positive rate difference
            # 2. Add false positive rate difference
    
    df_equalized_odds.to_csv(f"{prefix}_Equalized_odds.csv", index=False)
    # df_overall_model_metrics.to_csv("Overall_metrics.csv", index=False)
    df_unfairness_metrics.to_csv(f"{prefix}_Unfairness_metrics.csv", index=False)
    return





In [None]:
unfairness_metrics_evaluation(ms, X_test[selected_features_arizona], y_test)

Model: RandomForestClassifier()
Demographic feature RACE
result
result2
   RACE selection_rate  fbeta_06  fbeta_04  fbeta_05  count  \
0     0       0.604096  0.792701  0.786402  0.789474    586   
1     1       0.621649  0.812596  0.809809  0.811171   3880   
2     2       0.653491   0.83321   0.82924  0.831179   8349   
3     3       0.700422  0.870468  0.866124  0.868245    474   
4     4       0.683608  0.842763  0.834738  0.838648   2794   
5     5           0.84  0.741911  0.716354  0.728643     50   
6     6       0.660181  0.825587  0.820697  0.823084  35825   

                      model demographic_feature  
0  RandomForestClassifier()                RACE  
1  RandomForestClassifier()                RACE  
2  RandomForestClassifier()                RACE  
3  RandomForestClassifier()                RACE  
4  RandomForestClassifier()                RACE  
5  RandomForestClassifier()                RACE  
6  RandomForestClassifier()                RACE  
result1
result4
result5

# Evaluating Unfairness Metrics in each Feature


In [None]:
X=df[selected_features_arizona]
y = df['LOS_category']
A = df['FEMALE']
B = df['MARITALSTATUSUB04']
C = df['AGE']
D = df['RACE']
E = df['ZIPINC_QRTL']
F = df['MEDINCSTQ']
G = df['Homeless']
F2 = df['PAY1']
A1 = df[['FEMALE', 'RACE']]
# 'MARITALSTATUSUB04','PAY1','RACE','ZIPINC_QRTL'
X_train, X_test, y_train, y_test, A_train, A_test, B_train, B_test,C_train, C_test, D_train, D_test, E_train, E_test, F_train, F_test, G_train, G_test, A1_train,A1_test, F2_train,F2_test = train_test_split(df[selected_features_arizona], 
                                                                     df['LOS_category'],
                                                                     A,B,C,D,E,F,G,A1,F2,
                                                                     stratify=df['LOS_category'],
                                                                     test_size=0.3)

In [None]:
#Using XGBoost Classifier

In [None]:
# # Fairlearn algorithms and utils
# from fairlearn.postprocessing import ThresholdOptimizer
# from fairlearn.reductions import GridSearch, EqualizedOdds

# Metrics
from fairlearn.metrics import (
    MetricFrame,
    selection_rate, demographic_parity_difference, demographic_parity_ratio,
    false_positive_rate, false_negative_rate,
    false_positive_rate_difference, false_negative_rate_difference,
    equalized_odds_difference)
from sklearn.metrics import balanced_accuracy_score, roc_auc_score

#Using XGBoost Classifier

In [None]:
#Model Training 
clf = XGBClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0).fit(X_train, y_train)
clf.score(X_test, y_test)
y_pred = pd.DataFrame(clf.predict(X_test),columns=["Passed"])
target_names=['0','1']
print(classification_report(y_test, y_pred, target_names=target_names))

              precision    recall  f1-score   support

           0       0.72      0.68      0.70     19206
           1       0.82      0.84      0.83     32752

    accuracy                           0.78     51958
   macro avg       0.77      0.76      0.76     51958
weighted avg       0.78      0.78      0.78     51958



In [None]:
test_scores = clf.predict_proba(X_test)[:, 1]
test_preds = (test_scores >= np.mean(y_train)) * 1
# Train AUC
roc_auc_score(y_train, clf.predict_proba(X_train)[:, 1])

0.8453802626210235

# Female


In [None]:
mf = MetricFrame({
    'FPR': false_positive_rate,
    'FNR': false_negative_rate},
    y_test, test_preds, sensitive_features=A_test)

mf.by_group

Unnamed: 0_level_0,FPR,FNR
FEMALE,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.244972,0.215799
1,0.207883,0.250292
2,0.0,0.0


In [None]:
# Helper functions
def get_metrics_df(models_dict, y_true, group):
    metrics_dict = {
        "Overall selection rate": (
            lambda x: selection_rate(y_true, x), True),
        "Demographic parity difference": (
            lambda x: demographic_parity_difference(y_true, x, sensitive_features=group), True),
        "Demographic parity ratio": (
            lambda x: demographic_parity_ratio(y_true, x, sensitive_features=group), True),
        "------": (lambda x: "", True),
        "Overall balanced error rate": (
            lambda x: 1-balanced_accuracy_score(y_true, x), True),
        "Balanced error rate difference": (
            lambda x: MetricFrame(metrics=balanced_accuracy_score, y_true=y_true, y_pred=x, sensitive_features=group).difference(method='between_groups'), True),
        " ------": (lambda x: "", True),
        "False positive rate difference": (
            lambda x: false_positive_rate_difference(y_true, x, sensitive_features=group), True),
         " ------": (lambda x: "", True),
       
        "False negative rate difference": (
            lambda x: false_negative_rate_difference(y_true, x, sensitive_features=group), True),
        "Equalized odds difference": (
            lambda x: equalized_odds_difference(y_true, x, sensitive_features=group), True),
        "  ------": (lambda x: "", True),
        "Overall AUC": (
            lambda x: roc_auc_score(y_true, x), False),
        "AUC difference": (
            lambda x: MetricFrame(metrics=roc_auc_score, y_true=y_true, y_pred=x, sensitive_features=group).difference(method='between_groups'), False),
    }
    df_dict = {}
    for metric_name, (metric_func, use_preds) in metrics_dict.items():
        df_dict[metric_name] = [metric_func(preds) if use_preds else metric_func(scores) 
                                for model_name, (preds, scores) in models_dict.items()]
    return pd.DataFrame.from_dict(df_dict, orient="index", columns=models_dict.keys())

# Female

In [None]:
# Metrics
models_dict = {"Unmitigated": (test_preds, test_scores)}
get_metrics_df(models_dict, y_test, A_test)

Unnamed: 0,Unmitigated
Overall selection rate,0.563994
Demographic parity difference,0.096128
Demographic parity ratio,0.838746
------,
Overall balanced error rate,0.228947
Balanced error rate difference,0.230385
------,
False positive rate difference,0.244972
False negative rate difference,0.250292
Equalized odds difference,0.250292


# Marital Status

In [None]:
mf = MetricFrame({
    'FPR': false_positive_rate,
    'FNR': false_negative_rate},
    y_test, test_preds, sensitive_features=B_test)

mf.by_group

Unnamed: 0_level_0,FPR,FNR
MARITALSTATUSUB04,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.240725,0.252376
1,0.299895,0.183259
2,0.2,0.234463
3,0.255906,0.214612
4,0.186483,0.260451
5,0.289524,0.221487


In [None]:
# Metrics
models_dict = {"Unmitigated": (test_preds, test_scores)}
get_metrics_df(models_dict, y_test, B_test)

Unnamed: 0,Unmitigated
Overall selection rate,0.563994
Demographic parity difference,0.140843
Demographic parity ratio,0.786591
------,
Overall balanced error rate,0.228947
Balanced error rate difference,0.038274
------,
False positive rate difference,0.113412
False negative rate difference,0.077192
Equalized odds difference,0.113412


# AGE

In [None]:
mf = MetricFrame({
    'FPR': false_positive_rate,
    'FNR': false_negative_rate},
    y_test, test_preds, sensitive_features=C_test)

mf.by_group

Unnamed: 0_level_0,FPR,FNR
AGE,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.107413,0.352442
1,0.235499,0.226061


In [None]:
# Metrics
models_dict = {"Unmitigated": (test_preds, test_scores)}
get_metrics_df(models_dict, y_test, C_test)

Unnamed: 0,Unmitigated
Overall selection rate,0.563994
Demographic parity difference,0.171299
Demographic parity ratio,0.703993
------,
Overall balanced error rate,0.228947
Balanced error rate difference,0.000852
------,
False positive rate difference,0.128086
False negative rate difference,0.126381
Equalized odds difference,0.128086


## RACE

In [None]:
mf = MetricFrame({
    'FPR': false_positive_rate,
    'FNR': false_negative_rate},
    y_test, test_preds, sensitive_features=D_test)

mf.by_group


Unnamed: 0_level_0,FPR,FNR
RACE,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.20202,0.322767
1,0.225968,0.237527
2,0.216405,0.22403
3,0.2,0.204545
4,0.221074,0.239957
5,0.6,0.076923
6,0.223261,0.237138


In [None]:
# Metrics
models_dict = {"Unmitigated": (test_preds, test_scores)}
get_metrics_df(models_dict, y_test, D_test)

Unnamed: 0,Unmitigated
Overall selection rate,0.563994
Demographic parity difference,0.308972
Demographic parity ratio,0.620222
------,
Overall balanced error rate,0.228947
Balanced error rate difference,0.136189
------,
False positive rate difference,0.4
False negative rate difference,0.245843
Equalized odds difference,0.4


# ZIPPINC-QRTL

In [None]:
mf = MetricFrame({
    'FPR': false_positive_rate,
    'FNR': false_negative_rate},
    y_test, test_preds, sensitive_features=E_test)

mf.by_group

Unnamed: 0_level_0,FPR,FNR
ZIPINC_QRTL,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.211381,0.239272
1,0.241477,0.227794
2,0.217983,0.237765
3,0.233115,0.235104
4,0.199605,0.245321


In [None]:
# Metrics
models_dict = {"Unmitigated": (test_preds, test_scores)}
get_metrics_df(models_dict, y_test, E_test)

Unnamed: 0,Unmitigated
Overall selection rate,0.563994
Demographic parity difference,0.04974
Demographic parity ratio,0.914306
------,
Overall balanced error rate,0.228947
Balanced error rate difference,0.012173
------,
False positive rate difference,0.041873
False negative rate difference,0.017527
Equalized odds difference,0.041873


# Financial Group 

# MEDINCSTQ

In [None]:
mf = MetricFrame({
    'FPR': false_positive_rate,
    'FNR': false_negative_rate},
    y_test, test_preds, sensitive_features=F_test)

mf.by_group

Unnamed: 0_level_0,FPR,FNR
MEDINCSTQ,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.20316,0.24257
1,0.245315,0.230458
2,0.227994,0.228835
3,0.228942,0.235294
4,0.0,0.333333
5,0.199802,0.245144


In [None]:
# Metrics
models_dict = {"Unmitigated": (test_preds, test_scores)}
get_metrics_df(models_dict, y_test, F_test)

Unnamed: 0,Unmitigated
Overall selection rate,0.563994
Demographic parity difference,0.081635
Demographic parity ratio,0.859646
------,
Overall balanced error rate,0.228947
Balanced error rate difference,0.07122
------,
False positive rate difference,0.245315
False negative rate difference,0.104498
Equalized odds difference,0.245315


# Homeless

In [None]:
mf = MetricFrame({
    'FPR': false_positive_rate,
    'FNR': false_negative_rate},
    y_test, test_preds, sensitive_features=G_test)

mf.by_group

Unnamed: 0_level_0,FPR,FNR
Homeless,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.196117,0.30303
1,0.222995,0.234444


In [None]:
# Metrics
models_dict = {"Unmitigated": (test_preds, test_scores)}
get_metrics_df(models_dict, y_test, G_test)

Unnamed: 0,Unmitigated
Overall selection rate,0.563994
Demographic parity difference,0.109002
Demographic parity ratio,0.807502
------,
Overall balanced error rate,0.228947
Balanced error rate difference,0.020854
------,
False positive rate difference,0.026879
False negative rate difference,0.068586
Equalized odds difference,0.068586


# PAY1

In [None]:
mf = MetricFrame({
    'FPR': false_positive_rate,
    'FNR': false_negative_rate},
    y_test, test_preds, sensitive_features=F2_test)

mf.by_group

Unnamed: 0_level_0,FPR,FNR
PAY1,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.236723,0.247687
1,0.210556,0.235779
2,0.218148,0.229991
3,0.252772,0.167484
4,0.170732,0.426829
5,0.253205,0.212522


In [None]:
# Metrics
models_dict = {"Unmitigated": (test_preds, test_scores)}
get_metrics_df(models_dict, y_test, F2_test)

Unnamed: 0,Unmitigated
Overall selection rate,0.563994
Demographic parity difference,0.237394
Demographic parity ratio,0.649043
------,
Overall balanced error rate,0.228947
Balanced error rate difference,0.088653
------,
False positive rate difference,0.082473
False negative rate difference,0.259346
Equalized odds difference,0.259346


# Female and Race

In [None]:
mf = MetricFrame({
    'FPR': false_positive_rate,
    'FNR': false_negative_rate},
    y_test, test_preds, sensitive_features=A1_test)

mf.by_group



Unnamed: 0_level_0,Unnamed: 1_level_0,FPR,FNR
FEMALE,RACE,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0.179104,0.30597
0,1,0.270633,0.210191
0,2,0.245632,0.196474
0,3,0.232558,0.188679
0,4,0.222543,0.220196
0,5,0.666667,0.071429
0,6,0.244505,0.219121
1,0,0.21374,0.333333
1,1,0.199313,0.25588
1,2,0.202369,0.240606


In [None]:
# Metrics
models_dict = {"Unmitigated": (test_preds, test_scores)}
get_metrics_df(models_dict, y_test, A1_test)



Unnamed: 0,Unmitigated
Overall selection rate,0.563994
Demographic parity difference,0.388167
Demographic parity ratio,0.560078
------,
Overall balanced error rate,0.228947
Balanced error rate difference,0.369048
------,
False positive rate difference,0.666667
False negative rate difference,0.333333
Equalized odds difference,0.666667


# Mitigating Equalized Odds Difference in RACE feature  with Postprocessing algorithm ThresholdOptimizer


In [None]:
postprocess_est = ThresholdOptimizer(
    estimator=clf,
    constraints="equalized_odds",
    prefit=True)

balanced_idx1 = X_train[y_train==1].index
pp_train_idx = balanced_idx1.union(y_train[y_train==0].sample(n=balanced_idx1.size,replace= True, random_state=1234).index)

X_train_balanced = X_train.loc[pp_train_idx, :]
y_train_balanced = y_train.loc[pp_train_idx]
D_train_balanced = D_train.loc[pp_train_idx]

In [None]:
postprocess_est.fit(X_train_balanced, y_train_balanced, sensitive_features=D_train_balanced)



ThresholdOptimizer(constraints='equalized_odds',
                   estimator=XGBClassifier(learning_rate=1.0, max_depth=1),
                   prefit=True)

In [None]:
postprocess_preds = postprocess_est.predict(X_test, sensitive_features=D_test)

In [None]:
models_dict = {"Unmitigated": (test_preds, test_scores),
              "ThresholdOptimizer": (postprocess_preds, postprocess_preds)}
get_metrics_df(models_dict, y_test, D_test)

Unnamed: 0,Unmitigated,ThresholdOptimizer
Overall selection rate,0.563994,0.675815
Demographic parity difference,0.308972,0.115441
Demographic parity ratio,0.620222,0.851935
------,,
Overall balanced error rate,0.228947,0.292316
Balanced error rate difference,0.136189,0.111814
------,,
False positive rate difference,0.4,0.247199
False negative rate difference,0.245843,0.04625
Equalized odds difference,0.4,0.247199


# Mitigating Equalized Odds Difference in MEDINCSTQ feature  with Postprocessing algorithm ThresholdOptimizer

In [None]:
postprocess_est = ThresholdOptimizer(
    estimator=clf,
    constraints="equalized_odds",
    prefit=True)

balanced_idx1 = X_train[y_train==1].index
pp_train_idx = balanced_idx1.union(y_train[y_train==0].sample(n=balanced_idx1.size,replace= True, random_state=1234).index)

X_train_balanced = X_train.loc[pp_train_idx, :]
y_train_balanced = y_train.loc[pp_train_idx]
F_train_balanced = F_train.loc[pp_train_idx]

In [None]:
postprocess_est.fit(X_train_balanced, y_train_balanced, sensitive_features=F_train_balanced)




ThresholdOptimizer(constraints='equalized_odds',
                   estimator=XGBClassifier(learning_rate=1.0, max_depth=1),
                   prefit=True)

In [None]:
postprocess_preds = postprocess_est.predict(X_test, sensitive_features=F_test)

In [None]:
models_dict = {"Unmitigated": (test_preds, test_scores),
              "ThresholdOptimizer": (postprocess_preds, postprocess_preds)}
get_metrics_df(models_dict, y_test, F_test)

Unnamed: 0,Unmitigated,ThresholdOptimizer
Overall selection rate,0.563994,0.647831
Demographic parity difference,0.081635,0.403004
Demographic parity ratio,0.859646,0.382846
------,,
Overall balanced error rate,0.228947,0.248929
Balanced error rate difference,0.07122,0.094699
------,,
False positive rate difference,0.245315,0.338404
False negative rate difference,0.104498,0.505916
Equalized odds difference,0.245315,0.505916


# Mitigating Equalized Odds Difference in PAY1 feature  with Postprocessing algorithm ThresholdOptimizer

In [None]:
postprocess_est = ThresholdOptimizer(
    estimator=clf,
    constraints="equalized_odds",
    prefit=True)

balanced_idx1 = X_train[y_train==1].index
pp_train_idx = balanced_idx1.union(y_train[y_train==0].sample(n=balanced_idx1.size,replace= True, random_state=1234).index)

X_train_balanced = X_train.loc[pp_train_idx, :]
y_train_balanced = y_train.loc[pp_train_idx]
F2_train_balanced = F2_train.loc[pp_train_idx]

In [None]:
postprocess_est.fit(X_train_balanced, y_train_balanced, sensitive_features=F2_train_balanced)




ThresholdOptimizer(constraints='equalized_odds',
                   estimator=XGBClassifier(learning_rate=1.0, max_depth=1),
                   prefit=True)

In [None]:
postprocess_preds = postprocess_est.predict(X_test, sensitive_features=F2_test)
models_dict = {"Unmitigated": (test_preds, test_scores),
              "ThresholdOptimizer": (postprocess_preds, postprocess_preds)}
get_metrics_df(models_dict, y_test, F2_test)

Unnamed: 0,Unmitigated,ThresholdOptimizer
Overall selection rate,0.563994,0.617326
Demographic parity difference,0.237394,0.125048
Demographic parity ratio,0.649043,0.806239
------,,
Overall balanced error rate,0.228947,0.282129
Balanced error rate difference,0.088653,0.018779
------,,
False positive rate difference,0.082473,0.132458
False negative rate difference,0.259346,0.124683
Equalized odds difference,0.259346,0.132458


# Mitigating Equalized Odds Difference in Female and Race feature  with Postprocessing algorithm ThresholdOptimizer

In [None]:
postprocess_est = ThresholdOptimizer(
    estimator=clf,
    constraints="equalized_odds",
    prefit=True)

balanced_idx1 = X_train[y_train==1].index
pp_train_idx = balanced_idx1.union(y_train[y_train==0].sample(n=balanced_idx1.size,replace= True, random_state=1234).index)

X_train_balanced = X_train.loc[pp_train_idx, :]
y_train_balanced = y_train.loc[pp_train_idx]
A1_train_balanced = A1_train.loc[pp_train_idx]

In [None]:
postprocess_est.fit(X_train_balanced, y_train_balanced, sensitive_features=A1_train_balanced)




ValueError: ignored

In [None]:
postprocess_preds = postprocess_est.predict(X_test, sensitive_features=A1_test)
models_dict = {"Unmitigated": (test_preds, test_scores),
              "ThresholdOptimizer": (postprocess_preds, postprocess_preds)}
get_metrics_df(models_dict, y_test, A1_test)

AttributeError: ignored

# After Using Threshhold Optimizer, we will implement Grid Search to mitigate Bias

In [None]:
X_train_balanced = X_train.loc[pp_train_idx, :]
y_train_balanced = y_train.loc[pp_train_idx]
A_train_balanced = A_train.loc[pp_train_idx]
B_train_balanced = B_train.loc[pp_train_idx]
C_train_balanced = C_train.loc[pp_train_idx]
D_train_balanced = D_train.loc[pp_train_idx]
E_train_balanced = E_train.loc[pp_train_idx]
F_train_balanced = F_train.loc[pp_train_idx]
F2_train_balanced = F2_train.loc[pp_train_idx]
A1_train_balanced = A1_train.loc[pp_train_idx]

In [None]:
# General imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Data processing
from sklearn.model_selection import train_test_split

# # Models
# import lightgbm as lgb
# from sklearn.calibration import CalibratedClassifierCV

# Fairlearn algorithms and utils
from fairlearn.postprocessing import ThresholdOptimizer
from fairlearn.reductions import GridSearch, EqualizedOdds

# Metrics
from fairlearn.metrics import (
    MetricFrame,
    selection_rate, demographic_parity_difference, demographic_parity_ratio,
    false_positive_rate, false_negative_rate,
    false_positive_rate_difference, false_negative_rate_difference,
    equalized_odds_difference)
from sklearn.metrics import balanced_accuracy_score, roc_auc_score

In [None]:
# Train GridSearch
sweep = GridSearch(clf,
                   constraints=EqualizedOdds(),
                   grid_size=50,
                   grid_limit=3)

sweep.fit(X_train_balanced, y_train_balanced, sensitive_features=D_train_balanced)



In [None]:
sweep_preds = [predictor.predict(X_test) for predictor in sweep.predictors_] 
sweep_scores = [predictor.predict_proba(X_test)[:, 1] for predictor in sweep.predictors_] 

In [None]:
equalized_odds_sweep = [
    equalized_odds_difference(y_test, preds, sensitive_features=D_test)
    for preds in sweep_preds
]
balanced_accuracy_sweep = [balanced_accuracy_score(y_test, preds) for preds in sweep_preds]
auc_sweep = [roc_auc_score(y_test, scores) for scores in sweep_scores]

In [None]:
# Select only non-dominated models (with respect to balanced accuracy and equalized odds difference)
all_results = pd.DataFrame(
    {"predictor": sweep.predictors_, "accuracy": balanced_accuracy_sweep, "disparity": equalized_odds_sweep}
) 
non_dominated = [] 
for row in all_results.itertuples(): 
    accuracy_for_lower_or_eq_disparity = all_results["accuracy"][all_results["disparity"] <= row.disparity] 
    if row.accuracy >= accuracy_for_lower_or_eq_disparity.max(): 
        non_dominated.append(True)
    else:
        non_dominated.append(False)

equalized_odds_sweep_non_dominated = np.asarray(equalized_odds_sweep)[non_dominated]
balanced_accuracy_non_dominated = np.asarray(balanced_accuracy_sweep)[non_dominated]
auc_non_dominated = np.asarray(auc_sweep)[non_dominated]

In [None]:
# Compare GridSearch models with low values of equalized odds difference with the previously constructed models
grid_search_dict = {"GridSearch_{}".format(i): (sweep_preds[i], sweep_scores[i])
                    for i in range(len(sweep_preds))
                    if non_dominated[i] and equalized_odds_sweep[i]<0.1}
models_dict.update(grid_search_dict)
get_metrics_df(models_dict, y_test, D_test)

  result = self.group_min() / self.group_max()


Unnamed: 0,Unmitigated,ThresholdOptimizer,GridSearch_22,GridSearch_38
Overall selection rate,0.563994,0.617326,1.0,0.0
Demographic parity difference,0.308972,0.28005,0.0,0.0
Demographic parity ratio,0.620222,0.662797,1.0,
------,,,,
Overall balanced error rate,0.228947,0.282129,0.5,0.5
Balanced error rate difference,0.136189,0.131317,0.0,0.0
------,,,,
False positive rate difference,0.4,0.39697,0.0,0.0
False negative rate difference,0.245843,0.205793,0.0,0.0
Equalized odds difference,0.4,0.39697,0.0,0.0


In [None]:
# # Unfairness mitigation
# # To do: 
# # 1. Add metric computation after each model training. 
# # 1. Add save to file after each metric computation. 
# # 2. Run and check the results. 

# # Set the model to which the model mitigation technique needs to be applied. 
# estimator = RandomForestClassifier
# mitigated_result = pd.DataFrame
# def unfairness_mitigation(model_to_train, X_train, y_train, X_test, y_test):
#     """
#     Returns a list of models which are mitigated.  
#     """
#     # To do: 
#     # 1. Add save results to csv. 
#     mitigated_models = []
#     mitigated = ThresholdOptimizer(estimator=RandomForestClassifier(), constraints='equalized_odds',
#                                    objective='accuracy_score', grid_size=1000,
#                                    flip=False, prefit=False,
#                                    predict_method='deprecated')
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['ZIPINC_QRTL']])
#     y_pred = mitigated.predict(X_test, sensitive_features=X_train[['ZIPINC_QRTL']])
#     mitigated_models.append(mitigated)

#     mitigated_models.append(mitigated)
#     unfairness_metrics_evaluation([mitigated], X_test, y_test)
    
#     mitigated = ThresholdOptimizer(estimator=RandomForestClassifier(), constraints='demographic_parity',
#                                    objective='accuracy_score', grid_size=1000,
#                                    flip=False, prefit=False,
#                                    predict_method='deprecated')
#     mitigated.fit(X_train, y_train)
#     y_pred = mitigated.predict(X_test)

#     mitigated_models.append(mitigated)
#     unfairness_metrics_evaluation([mitigated], X_test, y_test)

#     moment = DemographicParity()
#     mitigated = ExponentiatedGradient(estimator=RandomForestClassifier(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['ZIPINC_QRTL']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     moment = DemographicParity()
#     mitigated = ExponentiatedGradient(estimator=RandomForestClassifier(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['RACE']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)


#     moment = DemographicParity()
#     mitigated = ExponentiatedGradient(estimator=RandomForestClassifier(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['AGE']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)


#     moment = DemographicParity()
#     mitigated = ExponentiatedGradient(estimator=RandomForestClassifier(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['FEMALE']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     moment = DemographicParity()
#     mitigated = ExponentiatedGradient(estimator=RandomForestClassifier(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['PAY1_X']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     moment = DemographicParity()
#     mitigated = GridSearch(estimator=RandomForestClassifier(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['ZIPINC_QRTL']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     moment = DemographicParity()
#     mitigated = GridSearch(estimator=RandomForestClassifier(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['RACE']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)


#     moment = DemographicParity()
#     mitigated = GridSearch(estimator=RandomForestClassifier(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['AGE']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     moment = DemographicParity()
#     mitigated = GridSearch(estimator=RandomForestClassifier(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['FEMALE']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     moment = DemographicParity()
#     mitigated = GridSearch(estimator=RandomForestClassifier(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['PAY1_X']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     unfairness_metrics_evaluation(mitigated_models, X_test, y_test, prefix="mitigated")
    
#     return mitigated_models


In [None]:
# # Unfairness mitigation
# # To do: 
# # 1. Add metric computation after each model training. 
# # 1. Add save to file after each metric computation. 
# # 2. Run and check the results. 

# # Set the model to which the model mitigation technique needs to be applied. 
# m = RandomForestClassifier
# mitigated_result = pd.DataFrame
# mitigated_models = []
# def unfairness_mitigation(model_to_train, X_train, y_train, X_test, y_test):
#     """
#     Returns a list of models which are mitigated.  
#     """
#     # To do: 
#     # 1. Add save results to csv. 
    
#     mitigated = ThresholdOptimizer(estimator=m(), constraints='equalized_odds',
#                                    objective='accuracy_score', grid_size=1000,
#                                    flip=False, prefit=False,
#                                    predict_method='predict') #deprecated
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['ZIPINC_QRTL']])
#     y_pred = mitigated.predict(X_test, sensitive_features=X_test[['ZIPINC_QRTL']] )
#     # mitigated_models.append(mitigated)
#     # return mitigated_models

#     mitigated_models.append(mitigated)
#     # unfairness_metrics_evaluation([mitigated], X_test, y_test)
    
#     mitigated = ThresholdOptimizer(estimator=m(), constraints='demographic_parity',
#                                    objective='accuracy_score', grid_size=1000,
#                                    flip=False, prefit=False,
#                                    predict_method='predict')
#     x=mitigated.fit(X_train, y_train, sensitive_features=X_train[['ZIPINC_QRTL']])
#     y_pred = mitigated.predict(X_test, sensitive_features=X_test[['ZIPINC_QRTL']])
#     mitigated_models.append(x)
#     unfairness_metrics_evaluation([mitigated], X_test, y_test)

#     moment = DemographicParity()
#     mitigated = ExponentiatedGradient(estimator=m(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['ZIPINC_QRTL']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     moment = DemographicParity()
#     mitigated = ExponentiatedGradient(estimator=m(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['RACE']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)


#     moment = DemographicParity()
#     mitigated = ExponentiatedGradient(estimator=m(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['AGE']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)


#     moment = DemographicParity()
#     mitigated = ExponentiatedGradient(estimator=m(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['FEMALE']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     moment = DemographicParity()
#     mitigated = ExponentiatedGradient(estimator=m(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['PAY1_X']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     moment = DemographicParity()
#     mitigated = GridSearch(estimator=m(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['ZIPINC_QRTL']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     moment = DemographicParity()
#     mitigated = GridSearch(estimator=m(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['RACE']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)


#     moment = DemographicParity()
#     mitigated = GridSearch(estimator=m(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['AGE']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     moment = DemographicParity()
#     mitigated = GridSearch(estimator=m(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['FEMALE']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     moment = DemographicParity()
#     mitigated = GridSearch(estimator=m(), constraints=moment)
#     mitigated.fit(X_train, y_train, sensitive_features=X_train[['PAY1_X']])
#     y_pred = mitigated.predict(X_test)
#     mitigated_models.append(mitigated)

#     unfairness_metrics_evaluation(mitigated_models, X_test, y_test, prefix="mitigated")
    
#     return mitigated_models


In [None]:
len(y_test)

51958

In [None]:
import pytorch_tabnet
from pytorch_tabnet.tab_model importTabNetClassifier
import torch
 
from sklearn.preprocessing importLabelEncoder
from sklearn.metrics importroc_auc_score, accuracy_score

In [None]:
# TABNET MODEL
tabnet_model = TabNetClassifier(optimizer_fn=torch.optim.Adam,
                       optimizer_params=dict(lr=2e-2),
                       scheduler_params={"step_size":10, # how to use learning rate scheduler
                                         "gamma":0.9},
                       scheduler_fn=torch.optim.lr_scheduler.StepLR,
                       mask_type='entmax' # "sparsemax"
                      )
 
# fit the model 
tabnet_model.fit(
    X_train.values,y_train.values,
    eval_set=[(X_train.values,y_train.values), (X_test.values,y_test.values)],
    eval_name=['train', 'valid'],
    eval_metric=['auc','accuracy'],
    max_epochs=50 , patience=50,
    batch_size=256,virtual_batch_size=128,
    num_workers=0,
    weights=1,
    drop_last=False
)

unfairness_metrics_evaluation([tabnet_model], X_test, y_test)