In [184]:
import pandas as pd
import numpy as np
import os

import sklearn.datasets
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder, FunctionTransformer

from sklearn.linear_model import LogisticRegression, Lars, LarsCV, LassoLars
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

from credoai.lens import Lens
from credoai.artifacts import ClassificationModel, TabularData, RegressionModel
from credoai.evaluators import ModelFairness, Performance

import warnings
warnings.filterwarnings('ignore')

In [185]:
def set_seed(seed):
    'Sets the seed of the entire notebook so results are the same every time we run. This is for REPRODUCIBILITY.'
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    
def summary(df):
    summ = pd.DataFrame(df.dtypes, columns=['dtypes'])
    summ['null'] = df.isnull().sum()
    summ['unique'] = df.nunique()
    summ['min'] = df.min()
    summ['median'] = df.median()
    summ['max'] = df.max()
    summ['mean'] = df.mean()
    summ['std'] = df.std()
    return summ
    
set_seed(42)

In [186]:
ds = sklearn.datasets.load_diabetes(as_frame=True, scaled=False)


In [210]:
scale_xf = ColumnTransformer(
    [
        (
            'std_scaler',
            StandardScaler(),
            ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
        )
    ],
    verbose_feature_names_out=False,
    remainder='passthrough'
).set_output(transform='pandas')

def normalize_to_bool(row):
    if row.target > 150:
        row.target = 1.0
    else:
        row.target = -1.0

    return row

def normalize_to_bool_biased(row):
    if row.sex == 1.0:
        if row.target > 150:
            row.target = 1.0
        else:
            row.target = -1.0
    else:
        if row.target > 200:
            row.target = 1.0
        else:
            row.target = -1.0
    return row
        

orig_data = ds.frame.copy().transform(normalize_to_bool, axis=1)
bias_data = ds.frame.copy().transform(normalize_to_bool_biased, axis=1)

train_orig_data = orig_data.drop(columns=["target"])
train_bias_data = bias_data.drop(columns=["target"])
target_orig_data = orig_data.target
target_bias_data = bias_data.target



In [211]:
# summary(bias_data)

In [212]:
train_xf = Pipeline(
    [
        ('cla', RandomForestClassifier())
    ]
).set_output(transform='pandas')

params = [
    {
        'cla': (KNeighborsClassifier(),),
        'cla__n_neighbors': [3, 5, 7],
        'cla__weights': ['uniform', 'distance']
    },
    {
        'cla': (MLPClassifier(),),
        'cla__hidden_layer_sizes': [(20,), (25,), (30,)],
        'cla__activation': ['logistic', 'relu'],
        'cla__max_iter': [1500]
    },
    {
        'cla': (DecisionTreeClassifier(),),
        'cla__criterion': ['gini', 'entropy'],
        'cla__max_depth': [5, 8, 10]
    },
    {
        'cla': (RandomForestClassifier(),),
        'cla__n_estimators': [50, 100, 150],
        'cla__max_depth': [5, 8, 10]
    }
]   

gs = GridSearchCV(
    estimator=train_xf,
    param_grid=params,
    #scoring='roc_auc',
    error_score='raise',
    cv=5,
    verbose=1,  # Set to 10 to print traces and know the % progress (very verbose)
    n_jobs=-2   # -1 uses all CPU cores; you can give a number > 0 to use that number of cores
)



In [213]:
# train_xf.fit(full_data, full_data.Y)
gs.fit(train_orig_data, target_orig_data)
best_train_xf = gs.best_estimator_
result = pd.DataFrame(gs.cv_results_).sort_values(by='rank_test_score').reset_index(drop=True)
result

Fitting 5 folds for each of 27 candidates, totalling 135 fits


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_cla,param_cla__n_neighbors,param_cla__weights,param_cla__activation,param_cla__hidden_layer_sizes,param_cla__max_iter,...,param_cla__n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.314779,0.016969,0.028554,0.001715,"RandomForestClassifier(max_depth=8, n_estimato...",,,,,,...,50.0,"{'cla': RandomForestClassifier(max_depth=8, n_...",0.730337,0.853933,0.715909,0.75,0.727273,0.75549,0.050431,1
1,0.663922,0.173598,0.002902,0.000107,MLPClassifier(),,,logistic,"(30,)",1500.0,...,,"{'cla': MLPClassifier(), 'cla__activation': 'l...",0.730337,0.786517,0.693182,0.75,0.795455,0.751098,0.037439,2
2,0.470495,0.068088,0.041646,0.004658,"RandomForestClassifier(max_depth=8, n_estimato...",,,,,,...,100.0,"{'cla': RandomForestClassifier(max_depth=8, n_...",0.707865,0.820225,0.704545,0.738636,0.761364,0.746527,0.042358,3
3,0.691401,0.016229,0.055522,0.004284,"RandomForestClassifier(max_depth=8, n_estimato...",,,,,,...,100.0,"{'cla': RandomForestClassifier(max_depth=8, n_...",0.719101,0.831461,0.681818,0.727273,0.761364,0.744203,0.050432,4
4,0.342805,0.010235,0.029205,0.002227,"RandomForestClassifier(max_depth=8, n_estimato...",,,,,,...,50.0,"{'cla': RandomForestClassifier(max_depth=8, n_...",0.730337,0.797753,0.693182,0.738636,0.738636,0.739709,0.033538,5
5,0.724563,0.03066,0.052928,0.004645,"RandomForestClassifier(max_depth=8, n_estimato...",,,,,,...,100.0,"{'cla': RandomForestClassifier(max_depth=8, n_...",0.707865,0.820225,0.670455,0.75,0.75,0.739709,0.05001,6
6,0.200486,0.021588,0.019448,0.001916,"RandomForestClassifier(max_depth=8, n_estimato...",,,,,,...,50.0,"{'cla': RandomForestClassifier(max_depth=8, n_...",0.707865,0.808989,0.704545,0.727273,0.738636,0.737462,0.037897,7
7,0.780046,0.138908,0.067387,0.014168,"RandomForestClassifier(max_depth=8, n_estimato...",,,,,,...,150.0,"{'cla': RandomForestClassifier(max_depth=8, n_...",0.707865,0.820225,0.704545,0.727273,0.727273,0.737436,0.042466,8
8,0.320708,0.089225,0.002845,0.000662,MLPClassifier(),,,relu,"(30,)",1500.0,...,,"{'cla': MLPClassifier(), 'cla__activation': 'r...",0.707865,0.820225,0.670455,0.75,0.738636,0.737436,0.049748,8
9,0.325423,0.131135,0.002633,0.000464,MLPClassifier(),,,relu,"(20,)",1500.0,...,,"{'cla': MLPClassifier(), 'cla__activation': 'r...",0.674157,0.775281,0.738636,0.761364,0.727273,0.735342,0.034905,10


In [215]:
best_train_xf.predict(train_bias_data)

array([ 1., -1., -1.,  1., -1., -1., -1., -1., -1.,  1., -1., -1., -1.,
        1., -1.,  1.,  1., -1., -1., -1., -1., -1., -1.,  1.,  1.,  1.,
       -1., -1., -1.,  1., -1., -1.,  1., -1., -1., -1.,  1.,  1.,  1.,
       -1., -1., -1., -1., -1.,  1., -1.,  1., -1., -1., -1.,  1.,  1.,
       -1., -1.,  1., -1., -1., -1.,  1.,  1., -1., -1., -1., -1., -1.,
        1., -1., -1., -1.,  1., -1.,  1.,  1., -1., -1., -1.,  1.,  1.,
        1., -1., -1., -1., -1.,  1., -1., -1., -1., -1., -1., -1., -1.,
        1., -1., -1., -1.,  1.,  1.,  1., -1., -1., -1., -1.,  1.,  1.,
       -1., -1., -1., -1.,  1., -1., -1., -1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1., -1., -1.,  1., -1., -1., -1.,  1.,
        1., -1., -1., -1., -1.,  1., -1.,  1.,  1.,  1., -1.,  1.,  1.,
       -1.,  1.,  1.,  1., -1., -1., -1.,  1., -1.,  1., -1.,  1.,  1.,
       -1., -1., -1.,  1., -1.,  1.,  1., -1.,  1., -1., -1.,  1.,  1.,
        1., -1., -1.,  1., -1.,  1., -1.,  1.,  1., -1.,  1., -1

In [216]:
credo_model = ClassificationModel(name="diabetes-classifier",
                                  model_like=best_train_xf)
credo_data = TabularData(
    name="diabetes-test1",
    X=bias_data,
    y=target_data,
    sensitive_features=bias_data.sex
)

The model was considered of type MULTICLASS_CLASSIFICATION.
Classes detected: [-1.0, 1.0]
Expected for binary classification: [0, 1]


In [124]:
lens = Lens(model=credo_model, assessment_data=credo_data)

# Precission score: % of true positives versus false positives https://credoai-lens.readthedocs.io/en/latest/pages/metrics.html#precision-score
# Recall score: probability of positive test if conditions are positive https://credoai-lens.readthedocs.io/en/latest/pages/metrics.html#true-positive-rate

metrics = ['precision_score', 'recall_score', 'equal_opportunity']
lens.add(ModelFairness(metrics=metrics))
lens.add(Performance(metrics=metrics))
lens.run()
None

2023-04-20 15:43:33,077 - lens - INFO - Evaluator ModelFairness added to pipeline. Sensitive feature: sex
2023-04-20 15:43:33,126 - lens - INFO - Evaluator Performance added to pipeline. 
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s finished


In [127]:
fairness_results = lens.get_results(evaluator_name='ModelFairness')[0]
results = lens.get_results()

aggr = fairness_results['results'][0]
disaggr = fairness_results['results'][1]

display(disaggr)
display(aggr)



Unnamed: 0,sex,type,value
0,1.0,precision_score,0.906977
1,2.0,precision_score,0.873786
2,1.0,recall_score,0.75
3,2.0,recall_score,0.9375


Unnamed: 0,type,value
0,equal_opportunity,0.1875
0,precision_score_parity,0.03319
1,recall_score_parity,0.1875
