In [1]:
from sklearn.model_selection import cross_val_score, train_test_split, cross_validate
from fairlearn.datasets import fetch_adult
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_selector as selector
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier

from concurrent.futures import ThreadPoolExecutor
import tqdm as notebook_tqdm
from metrics import (
    equality_opportunity_difference,
    predictive_equality_difference,
    predictive_parity_difference,
    metrics,
    average_absolute_odds_difference,
    metric_evaluation, 
    get_metric_evaluation,
    
)
from fairlearn.metrics import demographic_parity_difference
from sklearn.utils import resample

import numpy as np
import pandas as pd
import optuna
import dill
import pickle

from sklearn.metrics import (
    f1_score, 
    confusion_matrix, 
    make_scorer, 
    accuracy_score, 
    recall_score, 
    matthews_corrcoef,
    precision_score
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def detailed_objective(trial, data_dict, sensitive_col, preprocessor):
    classifier_name = trial.suggest_categorical("classifier", ["RF", 'GBM','LGBM'])

    if classifier_name == "logit":        
        params = {
            "penalty" : trial.suggest_categorical('logit_penalty', ['l1','l2']),
            "C" : trial.suggest_float('logit_c', 0.001, 10),
            "max_iter": 2000,
            "solver" : 'saga'
            }
        classifier = LogisticRegression(**params)

    elif classifier_name =="RF":
        params = {
            'n_estimators': trial.suggest_int("rf_n_estimators", 100, 1000),
            'criterion': trial.suggest_categorical("rf_criterion", ['gini', 'entropy']),
            'max_depth': trial.suggest_int("rf_max_depth", 1, 4),
            'min_samples_split': trial.suggest_float("rf_min_samples_split", 0.01, 1),
            }
        classifier = RandomForestClassifier(**params)

    elif classifier_name =="LGBM":
        params = {
            'n_estimators': trial.suggest_int("lgbm_n_estimators", 20, 10000),
            'num_leaves': trial.suggest_int("lgbm_num_leaves", 10, 1000),
            'max_depth': trial.suggest_int("lgbm_max_depth", 2, 20),
            'min_child_samples': trial.suggest_int("lgbm_min_child_samples", 5, 300),
            'learning_rate': trial.suggest_float('lgbm_learning_rate', 1e-5, 1e-2),
            'boosting_type': trial.suggest_categorical("lgbm_boosting_type", ['goss', 'gbdt'])
            }
        classifier = LGBMClassifier(**params)  

    elif classifier_name =="GBM":
        params = {
            'n_estimators': trial.suggest_int("gbm_n_estimators", 100, 1000), 
            'criterion': trial.suggest_categorical("gbm_criterion", ['squared_error', 'friedman_mse']),
            'max_depth': trial.suggest_int("gbm_max_depth", 1, 4),
            'min_samples_split': trial.suggest_int("gbm_min_samples_split", 5, 300),
            }
        classifier = GradientBoostingClassifier(**params)            

    else:
        None

    pipeline = Pipeline(
        steps=[
            ("preprocessor", preprocessor),
            ("classifier", classifier),
        ]
    )

    pipeline.fit(data_dict['X_train'], data_dict['y_train'])
    y_pred = pipeline.predict(data_dict['X_test'])
    metrics = metric_evaluation(
        y_true= data_dict['y_test'], 
        y_pred= y_pred, 
        sensitive_features=data_dict['X_test'][sensitive_col]
        )
    return classifier_name, metrics


def get_default_metrics(metrics, data_dict, sensitive_col, preprocessor):
    models = metrics['overall']['model_name'].unique()
    classifier = {
        'logit' : LogisticRegression(),
        'GBM' : GradientBoostingClassifier(),
        'LGBM' : LGBMClassifier(),
        'RF' : RandomForestClassifier(),
    }

    metrics['default_overall'] = pd.DataFrame()
    metrics['default_bygroup'] = pd.DataFrame()
    for model in models:
        clf = classifier[model]
        pipeline = Pipeline(
            steps=[
                ("preprocessor", preprocessor),
                ("classifier", clf),
            ]
        )

        pipeline.fit(data_dict['X_train'], data_dict['y_train'])
        y_pred = pipeline.predict(data_dict['X_test'])
        metric_frame = metric_evaluation(
            y_true= data_dict['y_test'], 
            y_pred=y_pred, 
            sensitive_features=data_dict['X_test'][sensitive_col]
        )
        # Overall
        fair_records = pd.DataFrame.from_records([get_metric_evaluation(metric_frame)])
        new_metric_overall = pd.concat([fair_records, pd.DataFrame(metric_frame.overall).T], axis = 1)
        new_metric_overall['model'] = model
        metrics['default_overall'] = pd.concat([metrics['default_overall'], new_metric_overall])
        # By group
        new_metric_bygroup = metric_frame.by_group.reset_index()
        new_metric_bygroup['model'] = model
        metrics['default_bygroup'] = pd.concat([metrics['default_bygroup'], new_metric_bygroup])
    return metrics

def get_metrics(study, data_dict, sensitive_col, preprocessor):
    metrics = {}
    metrics['overall'] = pd.DataFrame()
    metrics['bygroup'] = pd.DataFrame()
    #metrics['fair_metric'] = study.user_attrs['fair_metric']
    #metrics['model_metric'] = study.user_attrs['model_metric']
    i = 1
    for best_trial in study.best_trials:
        if best_trial.values != [0,0]:
            fair_value, model_value = best_trial.values
            clf_name, metric = detailed_objective(best_trial, data_dict, sensitive_col, preprocessor)
            # Overall
            fair_records = pd.DataFrame.from_records([get_metric_evaluation(metric)])
            new_metric_overall = pd.concat([fair_records, pd.DataFrame(metric.overall).T], axis = 1)
            new_metric_overall['best_trial'] = i
            new_metric_overall['fair_metric'] = fair_value
            new_metric_overall['model_metric'] = model_value
            new_metric_overall['model_name'] = clf_name
            metrics['overall'] = pd.concat([metrics['overall'], new_metric_overall])
            # By Groups
            new_metric_bygroup = metric.by_group.reset_index()
            new_metric_bygroup['best_trial'] = i
            metrics['bygroup'] = pd.concat([metrics['bygroup'], new_metric_bygroup])
            i += 1
    return metrics

In [3]:
numeric_transformer = Pipeline(
    steps=[
        ("impute", SimpleImputer()),
        ("scaler", StandardScaler()),
    ]
)
categorical_transformer = Pipeline(
    [
        ("impute", SimpleImputer(strategy="most_frequent")),
        ("ohe", OneHotEncoder(handle_unknown="ignore")),
    ]
)
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, selector(dtype_exclude="category")),
        ("cat", categorical_transformer, selector(dtype_include="category")),
    ]
)


n_sim = 0
sensitive_col = 'sex'
file_name = 'results/sex/f1-ppv-models-motpe-succesivehalving-parallel-150trials-4sim.pkl'

with open(file_name, 'rb') as in_strm:
    results = dill.load(in_strm)

sensitive_attribute = 'sex'
sim_n = 1
data = fetch_adult(as_frame=True)
X_raw = data.data
y = (data.target == ">50K") * 1

if sensitive_attribute == 'race':
    mapping = {'White':'white','Black':'black','Asian-Pac-Islander':'others','Amer-Indian-Eskimo':'others','Other':'others'}
    X_raw.loc[:,'race'] = X_raw['race'].map(mapping).astype("category")

perc = .5
X_raw, y = resample(X_raw, y, n_samples=int(perc*X_raw.shape[0]), random_state = 123)  
  
(X_train, X_test, y_train, y_test) = train_test_split(
    X_raw, y, test_size=0.8, stratify=y, random_state=sim_n
)

data_dict = {}
data_dict['X_train'] = X_train.reset_index(drop=True)
data_dict['X_test'] = X_test.reset_index(drop=True)
data_dict['y_train'] = y_train.reset_index(drop=True)
data_dict['y_test'] = y_test.reset_index(drop=True)

study = results[0]
metrics = get_metrics(study, data_dict, sensitive_col, preprocessor)
metrics = get_default_metrics(metrics, data_dict, sensitive_col, preprocessor)
metrics['file_name'] = file_name

  warn(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [5]:
metrics.keys()

dict_keys(['overall', 'bygroup', 'default_overall', 'default_bygroup', 'file_name'])

In [11]:
file_name = 'results/sex/f1-ppv-models-motpe-succesivehalving-parallel-150trials-4sim.pkl'
file_name = file_name[:-4] + '-metrics.pkl'
with open(file_name, 'wb') as file:
    dill.dump(metrics, file)
    print(f'Object successfully saved to "{file_name}"')

Object successfully saved to "results/sex/f1-ppv-models-motpe-succesivehalving-parallel-150trials-4sim-metrics.pkl"


In [12]:
model_mapping = {
    'LogisticRegression':'LR',
    'RandomForestClassifier':'RF',
    'GradientBoostingClassifier':'GBM',
    'LGBMClassifier' : 'LGBM'}


metrics['default_overall']['model'].map(model_mapping)

0    NaN
0    NaN
Name: model, dtype: object

In [28]:
metrics['overall']

Unnamed: 0,demographic parity,predictive parity,equality opportunity,predictive equality,average absolute odds,accuracy,precision,recall,f1 score,mcc,selection rate,false positive rate,true positive rate,false negative rate,true negative rate,count,best_trial,fair_metric,model_metric,model_name
0,0.02431,0.996835,0.079727,0.000111,0.039919,0.776322,0.996835,0.06725,0.126,0.227375,0.016174,6.7e-05,0.06725,0.93275,0.999933,19537.0,1,0.000111,0.129615,RF
0,0.011385,0.993243,0.037206,0.000111,0.018658,0.767723,0.993243,0.031383,0.060844,0.154197,0.007575,6.7e-05,0.031383,0.968617,0.999933,19537.0,2,0.0,0.052761,RF
0,0.038655,0.006873,0.091722,0.000442,0.046082,0.791677,0.993569,0.131939,0.232944,0.320185,0.031837,0.000269,0.131939,0.868061,0.999731,19537.0,3,0.000219,0.211406,RF
0,0.028683,0.005814,0.031614,0.000332,0.015973,0.790039,0.994898,0.124893,0.221927,0.31158,0.030097,0.000202,0.124893,0.875107,0.999798,19537.0,4,0.000726,0.392168,LGBM
0,0.036005,0.002134,0.043393,0.000491,0.021942,0.796796,0.990385,0.153928,0.266445,0.345902,0.037263,0.000471,0.153928,0.846072,0.999529,19537.0,5,0.000812,0.398623,LGBM
0,0.0488,0.02778,0.052258,0.006592,0.029425,0.802631,0.923313,0.192784,0.318969,0.367547,0.050059,0.005049,0.192784,0.807216,0.994951,19537.0,6,0.006572,0.493216,LGBM
0,0.042162,0.00625,0.010208,0.001349,0.005778,0.81251,0.976657,0.223313,0.36351,0.415684,0.054819,0.001683,0.223313,0.776687,0.998317,19537.0,7,0.004248,0.472184,LGBM
0,0.141611,0.086868,0.115178,0.044837,0.080007,0.854225,0.827156,0.495517,0.61976,0.563461,0.143625,0.032653,0.495517,0.504483,0.967347,19537.0,8,0.039348,0.635345,LGBM
0,0.120669,0.056145,0.092362,0.030163,0.061262,0.852024,0.86193,0.455807,0.596285,0.555305,0.126785,0.023026,0.455807,0.544193,0.976974,19537.0,9,0.021963,0.61861,LGBM
0,0.184179,0.063885,0.138032,0.071359,0.104696,0.864155,0.774324,0.611657,0.683445,0.605222,0.189384,0.056218,0.611657,0.388343,0.943782,19537.0,10,0.04958,0.704688,LGBM


In [None]:
#df_metrics_sorted = df_metrics.sort_values(['train_fair'])
#new_index = df_metrics_sorted.index

In [58]:
metrics['overall'] = metrics['overall'].sort_values(['fair_metric']).reset_index(drop = True)


In [60]:
metrics['overall'].loc[metrics['overall'].index == 3,'best_trial'].values[0]

17

In [55]:
metrics['overall'].sort_values(['fair_metric']).reset_index(drop = True)

Unnamed: 0,demographic parity,predictive parity,equality opportunity,predictive equality,average absolute odds,accuracy,precision,recall,f1 score,mcc,selection rate,false positive rate,true positive rate,false negative rate,true negative rate,count,best_trial,fair_metric,model_metric,model_name
0,0.011385,0.993243,0.037206,0.000111,0.018658,0.767723,0.993243,0.031383,0.060844,0.154197,0.007575,6.7e-05,0.031383,0.968617,0.999933,19537.0,2,0.0,0.052761,RF
1,0.02431,0.996835,0.079727,0.000111,0.039919,0.776322,0.996835,0.06725,0.126,0.227375,0.016174,6.7e-05,0.06725,0.93275,0.999933,19537.0,1,0.000111,0.129615,RF
2,0.038655,0.006873,0.091722,0.000442,0.046082,0.791677,0.993569,0.131939,0.232944,0.320185,0.031837,0.000269,0.131939,0.868061,0.999731,19537.0,3,0.000219,0.211406,RF
3,0.021673,0.0,0.017922,0.0,0.008961,0.784153,1.0,0.099701,0.181324,0.278665,0.023903,0.0,0.099701,0.900299,1.0,19537.0,17,0.000328,0.298484,LGBM
4,0.028683,0.005814,0.031614,0.000332,0.015973,0.790039,0.994898,0.124893,0.221927,0.31158,0.030097,0.000202,0.124893,0.875107,0.999798,19537.0,4,0.000726,0.392168,LGBM
5,0.036005,0.002134,0.043393,0.000491,0.021942,0.796796,0.990385,0.153928,0.266445,0.345902,0.037263,0.000471,0.153928,0.846072,0.999529,19537.0,5,0.000812,0.398623,LGBM
6,0.057346,0.00696,0.008146,0.003117,0.005632,0.825869,0.969941,0.282451,0.4375,0.468568,0.069816,0.00276,0.282451,0.717549,0.99724,19537.0,19,0.000948,0.417735,LGBM
7,0.054103,0.001775,0.010189,0.002454,0.006322,0.822337,0.97272,0.266439,0.418301,0.455159,0.06567,0.002356,0.266439,0.733561,0.997644,19537.0,20,0.001751,0.427776,LGBM
8,0.068367,0.026821,0.017659,0.007795,0.012727,0.831448,0.942149,0.316396,0.473709,0.486843,0.080514,0.006127,0.316396,0.683604,0.993873,19537.0,18,0.003929,0.47181,LGBM
9,0.042162,0.00625,0.010208,0.001349,0.005778,0.81251,0.976657,0.223313,0.36351,0.415684,0.054819,0.001683,0.223313,0.776687,0.998317,19537.0,7,0.004248,0.472184,LGBM


In [48]:
 metrics['default_overall']['model'] == df['model_name']

ValueError: Can only compare identically-labeled Series objects

In [43]:
    df = metrics['overall'].loc[metrics['overall'].best_trial == 5]
    n_model = df['model_name'] == metrics['default_overall']['model']

ValueError: Can only compare identically-labeled Series objects

In [40]:
df['model_name']

0    LGBM
Name: model_name, dtype: object

In [36]:
metrics['overall'].loc[metrics['overall'].best_trial == 5,'model_name']

0    LGBM
Name: model_name, dtype: object

In [31]:
metrics['overall'][metrics['overall'].best_trial == 5,'model_name'] == metrics['default_overall']['model']

InvalidIndexError: (0    False
0    False
0    False
0    False
0     True
0    False
0    False
0    False
0    False
0    False
0    False
0    False
0    False
0    False
0    False
0    False
0    False
0    False
0    False
0    False
0    False
0    False
Name: best_trial, dtype: bool, 'model_name')

In [13]:
metrics['default_overall']['model']

0      RF
0    LGBM
Name: model, dtype: object

In [26]:
metrics['bygroup'][metrics['bygroup'].best_trial == 1].iloc[:,0]

0    Female
1      Male
Name: sex, dtype: object

In [24]:
metrics['bygroup'].iloc[:,0]

0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
0    Female
1      Male
Name: sex, dtype: object

In [None]:
def create_df_groups_metrics(n, results_dict, model_mapping):
    models = list(map(model_mapping.get,results_dict['models_sim_u'][0]))
    #n_model = models.index(results_dict['models_sim'][0][n])

    #df_groups_u = metrics['']
    df_groups_u = results_dict['metrics_sim_u'][0][n_model].by_group.T
    d = results_dict['metrics_sim_u'][0][n_model].difference()
    d.name = 'Difference'
    df_groups_u = pd.concat([df_groups_u,d], axis = 1).T
    df_groups_u.columns = df_groups_u.columns + ' u'

    df_groups_m = results_dict['metrics_sim'][0][n].by_group.T
    d = results_dict['metrics_sim'][0][n].difference()
    d.name = 'Difference'
    df_groups_m = pd.concat([df_groups_m,d], axis = 1).T
    df_groups = pd.concat([df_groups_u,df_groups_m],axis = 1).reset_index()
    return df_groups