Inspired by this tutorial:  https://iamrajatroy.medium.com/evaluate-model-fairness-with-fairlearn-97a8985074fd

# Generate locally an ML model for data provider

In [8]:
import pandas as pd
import numpy as np
from pycaret.classification import *
from fairlearn.metrics import demographic_parity_difference, demographic_parity_ratio

# Load data
df = pd.read_csv('/home/acraf/psr/Fdatavalidation/DataProductLayer/DataProduct3/Data/Explotation/UPENN-GBM_clinical_info_v2.1_ML.csv')

# Clean the survival days column
df['Survival_from_surgery_days_UPDATED'] = pd.to_numeric(
    df['Survival_from_surgery_days_UPDATED'].replace('Not Available', np.nan), 
    errors='coerce'
)

# Remove rows with NaN values in survival days
df = df.dropna(subset=['Survival_from_surgery_days_UPDATED'])

# Create binary target
df['target'] = (df['Survival_from_surgery_days_UPDATED'] > 
                df['Survival_from_surgery_days_UPDATED'].median()).astype(int)

# Initialize PyCaret
clf = setup(data=df, 
           target='target',
           ignore_features=['ID', 'Time_since_baseline_preop', 'PsP_TP_score',
                          'Survival_Censor', 'Survival_Status', 
                          'Survival_from_surgery_days_UPDATED'],
           use_gpu=True)

# Train best model
best_model = compare_models(n_select=1)

# Get predictions on test set
predictions = predict_model(best_model)

# Check fairness metrics for gender
fairness_metrics = {
    'demographic_parity_diff': demographic_parity_difference(
        predictions['target'], 
        predictions['prediction_label'], 
        sensitive_features=predictions['Gender']
    ),
    'demographic_parity_ratio': demographic_parity_ratio(
        predictions['target'], 
        predictions['prediction_label'], 
        sensitive_features=predictions['Gender']
    )
}

# Print results
print("\nModel Performance:")
print(pull())
print("\nFairness Metrics:")
for metric, value in fairness_metrics.items():
    print(f"{metric}: {value:.3f}")

[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Number of positive: 1, number of negative: 1


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1


[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Number of positive: 1, number of negative: 1


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1


[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Number of positive: 1, number of negative: 1


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1


[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Number of positive: 1, number of negative: 1


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1


Unnamed: 0,Description,Value
0,Session id,4012
1,Target,target
2,Target type,Binary
3,Original data shape,"(644, 13)"
4,Transformed data shape,"(644, 22)"
5,Transformed train set shape,"(450, 22)"
6,Transformed test set shape,"(194, 22)"
7,Ignore features,6
8,Numeric features,1
9,Categorical features,5


[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] Number of positive: 1, number of negative: 1
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Number of positive: 1, number of negative: 1


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.6778,0.7204,0.682,0.6788,0.6782,0.3555,0.3578,0.241
ridge,Ridge Classifier,0.6778,0.719,0.6822,0.6785,0.6781,0.3558,0.3579,0.143
lda,Linear Discriminant Analysis,0.6778,0.716,0.6822,0.6785,0.6781,0.3558,0.3579,0.133
ada,Ada Boost Classifier,0.6556,0.6786,0.6781,0.6464,0.6594,0.3113,0.3139,0.255
gbc,Gradient Boosting Classifier,0.6467,0.6778,0.6291,0.6527,0.6368,0.2939,0.2966,0.289
lightgbm,Light Gradient Boosting Machine,0.6244,0.6481,0.6336,0.6245,0.627,0.2495,0.2508,0.29
knn,K Neighbors Classifier,0.6067,0.6215,0.6289,0.6017,0.6121,0.2141,0.2162,0.146
nb,Naive Bayes,0.6,0.6644,0.5403,0.5658,0.5344,0.1995,0.202,0.134
dt,Decision Tree Classifier,0.56,0.5613,0.5597,0.5563,0.5545,0.1222,0.1235,0.131
rf,Random Forest Classifier,0.5556,0.5934,0.5623,0.5524,0.5535,0.1112,0.1134,0.506


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.701,0.7499,0.6979,0.6979,0.6979,0.402,0.402



Model Performance:
                 Model  Accuracy     AUC  Recall   Prec.      F1  Kappa    MCC
0  Logistic Regression     0.701  0.7499  0.6979  0.6979  0.6979  0.402  0.402

Fairness Metrics:
demographic_parity_diff: 0.045
demographic_parity_ratio: 0.913
