In [88]:
import pandas as pd
import numpy as np
import os

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder, FunctionTransformer

from sklearn.linear_model import LogisticRegression, Lars, LarsCV, LassoLars
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

from credoai.lens import Lens
from credoai.artifacts import ClassificationModel, TabularData, RegressionModel
from credoai.evaluators import ModelFairness, Performance

import warnings
warnings.filterwarnings('ignore')

In [2]:
def set_seed(seed):
    'Sets the seed of the entire notebook so results are the same every time we run. This is for REPRODUCIBILITY.'
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    
def summary(df):
    summ = pd.DataFrame(df.dtypes, columns=['dtypes'])
    summ['null'] = df.isnull().sum()
    summ['unique'] = df.nunique()
    summ['min'] = df.min()
    summ['median'] = df.median()
    summ['max'] = df.max()
    summ['mean'] = df.mean()
    summ['std'] = df.std()
    return summ
    
set_seed(42)

In [39]:
DATA_FILE = './diabetes.tab.txt'

full_data = pd.read_csv(DATA_FILE, sep="\t")
full_data

Unnamed: 0,AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6,Y
0,59,2,32.1,101.00,157,93.2,38.0,4.00,4.8598,87,151
1,48,1,21.6,87.00,183,103.2,70.0,3.00,3.8918,69,75
2,72,2,30.5,93.00,156,93.6,41.0,4.00,4.6728,85,141
3,24,1,25.3,84.00,198,131.4,40.0,5.00,4.8903,89,206
4,50,1,23.0,101.00,192,125.4,52.0,4.00,4.2905,80,135
...,...,...,...,...,...,...,...,...,...,...,...
437,60,2,28.2,112.00,185,113.8,42.0,4.00,4.9836,93,178
438,47,2,24.9,75.00,225,166.0,42.0,5.00,4.4427,102,104
439,60,2,24.9,99.67,162,106.6,43.0,3.77,4.1271,95,132
440,36,1,30.0,95.00,201,125.2,42.0,4.79,5.1299,85,220


In [117]:
bias_data = full_data

def add_bias(row):
    if row.SEX == 1:
        row.Y = row.Y * 1.3
    return row
    
bias_data = bias_data.apply(add_bias, axis=1)
bias_data


Unnamed: 0,AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6,Y
0,59.0,2.0,32.1,101.00,157.0,93.2,38.0,4.00,4.8598,87.0,151.0
1,48.0,1.0,21.6,87.00,183.0,103.2,70.0,3.00,3.8918,69.0,97.5
2,72.0,2.0,30.5,93.00,156.0,93.6,41.0,4.00,4.6728,85.0,141.0
3,24.0,1.0,25.3,84.00,198.0,131.4,40.0,5.00,4.8903,89.0,267.8
4,50.0,1.0,23.0,101.00,192.0,125.4,52.0,4.00,4.2905,80.0,175.5
...,...,...,...,...,...,...,...,...,...,...,...
437,60.0,2.0,28.2,112.00,185.0,113.8,42.0,4.00,4.9836,93.0,178.0
438,47.0,2.0,24.9,75.00,225.0,166.0,42.0,5.00,4.4427,102.0,104.0
439,60.0,2.0,24.9,99.67,162.0,106.6,43.0,3.77,4.1271,95.0,132.0
440,36.0,1.0,30.0,95.00,201.0,125.2,42.0,4.79,5.1299,85.0,286.0


In [73]:
summary(full_data)

Unnamed: 0,dtypes,null,unique,min,median,max,mean,std
AGE,int64,0,58,19.0,50.0,79.0,48.5181,13.109028
SEX,int64,0,2,1.0,1.0,2.0,1.468326,0.499561
BMI,float64,0,163,18.0,25.7,42.2,26.375792,4.418122
BP,float64,0,100,62.0,93.0,133.0,94.647014,13.831283
S1,int64,0,141,97.0,186.0,301.0,189.140271,34.608052
S2,float64,0,302,41.6,113.0,242.4,115.43914,30.413081
S3,float64,0,63,22.0,48.0,99.0,49.788462,12.934202
S4,float64,0,66,2.0,4.0,9.09,4.070249,1.29045
S5,float64,0,184,3.2581,4.62005,6.107,4.641411,0.522391
S6,int64,0,56,58.0,91.0,124.0,91.260181,11.496335


In [64]:
def dissoc_y(data):
    return data.drop(columns=['Y'])

dissoc_y_xf = FunctionTransformer(dissoc_y)

scale_xf = ColumnTransformer(
    [
        (
            'std_scaler',
            StandardScaler(),
            ['AGE', 'SEX', 'BMI', 'BP', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'Y']
        )
    ],
    verbose_feature_names_out=False,
    remainder='passthrough'
).set_output(transform='pandas')


In [43]:
process_xf = Pipeline(
    [
          ('scale_encode', scale_xf)
    ]
).set_output(transform='pandas')

process_xf.fit_transform(full_data)

Unnamed: 0,AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6,Y
0,0.800500,1.065488,1.297088,0.459841,-0.929746,-0.732065,-0.912451,-0.054499,0.418531,-0.370989,-0.014719
1,-0.039567,-0.938537,-1.082180,-0.553505,-0.177624,-0.402886,1.564414,-0.830301,-1.436589,-1.938479,-1.001659
2,1.793307,1.065488,0.934533,-0.119214,-0.958674,-0.718897,-0.680245,-0.054499,0.060156,-0.545154,-0.144580
3,-1.872441,-0.938537,-0.243771,-0.770650,0.256292,0.525397,-0.757647,0.721302,0.476983,-0.196823,0.699513
4,0.113172,-0.938537,-0.764944,0.459841,0.082726,0.327890,0.171178,-0.054499,-0.672502,-0.980568,-0.222496
...,...,...,...,...,...,...,...,...,...,...,...
437,0.876870,1.065488,0.413360,1.256040,-0.119769,-0.053957,-0.602843,-0.054499,0.655787,0.151508,0.335904
438,-0.115937,1.065488,-0.334410,-1.422086,1.037341,1.664355,-0.602843,0.721302,-0.380819,0.935254,-0.625064
439,0.876870,1.065488,-0.334410,0.363573,-0.785107,-0.290965,-0.525441,-0.232934,-0.985649,0.325674,-0.261454
440,-0.956004,-0.938537,0.821235,0.025550,0.343075,0.321306,-0.602843,0.558384,0.936163,-0.545154,0.881318


In [101]:
classifier = Lars()


#classifier = MLPClassifier(hidden_layer_sizes=(20,),
#                           activation = 'logistic',
#                           max_iter = 1500)

train_xf = Pipeline(
    [
        ('process', process_xf),
        ('dissoc_y', dissoc_y_xf),
        ('cla', classifier)
    ]
).set_output(transform='pandas')

params = [
    {
        'cla': (Lars(),),
    },
    {
        'cla': (LarsCV(),),
    },
    {
        'cla': (LassoLars(),),
    },
]   

gs = GridSearchCV(
    estimator=train_xf,
    param_grid=params,
    #scoring='roc_auc',
    error_score='raise',
    cv=5,
    verbose=1,  # Set to 10 to print traces and know the % progress (very verbose)
    n_jobs=1   # -1 uses all CPU cores; you can give a number > 0 to use that number of cores
)



In [118]:
#train_xf.fit(full_data, full_data.Y)
gs.fit(bias_data, bias_data.Y)
classifier = gs.best_estimator_

result = pd.DataFrame(gs.cv_results_).sort_values(by='rank_test_score').reset_index(drop=True)

result

Fitting 5 folds for each of 3 candidates, totalling 15 fits


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_cla,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.023413,0.002799,0.006047,0.001021,LarsCV(),{'cla': LarsCV()},0.43893,0.533917,0.472306,0.515645,0.534089,0.498977,0.037546,1
1,0.009427,0.001509,0.004968,0.000758,LassoLars(),{'cla': LassoLars()},0.436754,0.533331,0.474744,0.497637,0.538912,0.496276,0.037946,2
2,0.017908,0.005138,0.00938,0.00198,Lars(),{'cla': Lars()},0.445155,0.535799,0.463831,-2.87525,0.547097,-0.176674,1.349866,3


In [120]:
#dropped_data
#classifier.predict(bias_data)

In [123]:
transformed_data = scale_xf.fit_transform(bias_data)
#dropped_data = dissoc_y_xf.fit_transform(bias_data)

credo_model = RegressionModel(name="diabetes-classifier",
                              model_like=classifier)
credo_data = TabularData(
    name="diabetes-test1",
    X=transformed_data,
    y=transformed_data.Y,
    sensitive_features=full_data.SEX
)

In [124]:
lens = Lens(model=credo_model, assessment_data=credo_data)

# Precission score: % of true positives versus false positives https://credoai-lens.readthedocs.io/en/latest/pages/metrics.html#precision-score
# Recall score: probability of positive test if conditions are positive https://credoai-lens.readthedocs.io/en/latest/pages/metrics.html#true-positive-rate

metrics = ["R2_Score", "max_error"] #['precision_score', 'recall_score', 'equal_opportunity']
lens.add(ModelFairness(metrics=metrics))
lens.add(Performance(metrics=metrics))
lens.run()
None

2023-04-20 11:43:39,361 - lens - INFO - Evaluator ModelFairness added to pipeline. Sensitive feature: SEX
2023-04-20 11:43:39,383 - lens - INFO - Evaluator Performance added to pipeline. 
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s finished


In [125]:
fairness_results = lens.get_results(evaluator_name='ModelFairness')[0]
results = lens.get_results()

aggr = fairness_results['results'][0]
disaggr = fairness_results['results'][1]

display(disaggr)
display(aggr)

results


Unnamed: 0,SEX,type,value
0,1,R2_Score,-10.068538
1,2,R2_Score,-24.163006
2,1,max_error,516.177489
3,2,max_error,562.519212


Unnamed: 0,type,value
0,R2_Score_parity,14.094467
1,max_error_parity,46.341723


[{'metadata': {'evaluator': 'ModelFairness',
   'sensitive_feature': 'SEX',
   'dataset_type': 'assessment_data'},
  'results': [               type      value
   0   R2_Score_parity  14.094467
   1  max_error_parity  46.341723,
      SEX       type       value
   0    1   R2_Score  -10.068538
   1    2   R2_Score  -24.163006
   2    1  max_error  516.177489
   3    2  max_error  562.519212]},
 {'metadata': {'evaluator': 'Performance'},
  'results': [        type       value
   0   R2_Score  -14.414595
   1  max_error  562.519212]}]