In [2]:
!pip install "aequitas==1.0.0" &> /dev/null
import pandas as pd
import numpy as np
from aequitas.group import Group
from aequitas.bias import Bias
from aequitas.plotting import Plot

import seaborn as sns
from aequitas.audit import Audit
from aequitas.fairness import Fairness
import aequitas.plot as ap

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [23]:
df = pd.read_csv("https://github.com/dssg/aequitas/raw/master/examples/data/compas_for_aequitas.csv")
df.head()

Unnamed: 0,entity_id,score,label_value,race,sex,age_cat
0,1,0.0,0,Other,Male,Greater than 45
1,3,0.0,1,African-American,Male,25 - 45
2,4,0.0,1,African-American,Male,Less than 25
3,5,1.0,0,African-American,Male,Less than 25
4,6,0.0,0,Other,Male,25 - 45


In [24]:
df.drop_duplicates()

Unnamed: 0,entity_id,score,label_value,race,sex,age_cat
0,1,0.0,0,Other,Male,Greater than 45
1,3,0.0,1,African-American,Male,25 - 45
2,4,0.0,1,African-American,Male,Less than 25
3,5,1.0,0,African-American,Male,Less than 25
4,6,0.0,0,Other,Male,25 - 45
...,...,...,...,...,...,...
7209,10996,1.0,0,African-American,Male,Less than 25
7210,10997,0.0,0,African-American,Male,Less than 25
7211,10999,0.0,0,Other,Male,Greater than 45
7212,11000,0.0,0,African-American,Female,25 - 45


In [25]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline

# Assuming your data has categorical variables like 'race' and target variable 'score'
# First, we apply one-hot encoding to the categorical columns
df_encoded = pd.get_dummies(df, columns=['race'])
df_encoded = pd.get_dummies(df_encoded, columns=['sex'])
df_encoded = pd.get_dummies(df_encoded, columns=['age_cat'])


# Separate the features and target variable
X = df_encoded.drop(columns=['label_value'])
y = df_encoded['label_value']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Over-sampling and under-sampling pipeline
over = SMOTE(sampling_strategy=0.9)  # Over-sample the minority class
under = RandomUnderSampler(sampling_strategy=0.9)  # Under-sample the majority class

steps = [('over', over), ('under', under), ('model', RandomForestClassifier())]
pipeline = Pipeline(steps=steps)

# Train the model
pipeline.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = pipeline.predict(X)
print(classification_report(y, y_pred))

              precision    recall  f1-score   support

           0       0.88      0.88      0.88      3963
           1       0.85      0.86      0.85      3251

    accuracy                           0.87      7214
   macro avg       0.87      0.87      0.87      7214
weighted avg       0.87      0.87      0.87      7214



In [19]:
# from fairlearn.reductions import ExponentiatedGradient, DemographicParity
# from sklearn.ensemble import RandomForestClassifier
# import warnings
# # Import the warning module.
# warnings.simplefilter(action='ignore', category=FutureWarning)

# df_encoded = pd.get_dummies(df, columns=['race'])
# df_encoded = pd.get_dummies(df_encoded, columns=['sex'])
# df_encoded = pd.get_dummies(df_encoded, columns=['age_cat'])

# # Separate the features and target variable
# X = df_encoded.drop(columns=['label_value'])  # Assuming 'score' is your target variable
# y = df_encoded['label_value']

# # Over-sampling and under-sampling pipeline
# smote = SMOTE(sampling_strategy=0.9)
# under = RandomUnderSampler(sampling_strategy=0.9)

# X_resampled, y_resampled = smote.fit_resample(X, y)
# X_resampled, y_resampled = under.fit_resample(X_resampled, y_resampled)

# # update df after resampling
# df_resampled = pd.concat([pd.DataFrame(X_resampled, columns=X.columns), pd.DataFrame(y_resampled, columns=['label_value'])], axis=1)


# # Use the race-related columns as sensitive features
# sensitive_features_resampled = X_resampled[['race_African-American', 'race_Asian', 'race_Caucasian', 'race_Hispanic', 'race_Native American', 'race_Other']]

# # Define a RandomForestClassifier as the base model
# rf = RandomForestClassifier()

# # Apply fairness constraint (Demographic Parity) across the resampled 'race' columns
# constraint = DemographicParity()

# # Apply the fairness constraint using ExponentiatedGradient
# mitigator = ExponentiatedGradient(estimator=rf, constraints=constraint)

# # Fit the model with resampled data and resampled sensitive features (race columns)
# mitigator.fit(X_resampled, y_resampled, sensitive_features=sensitive_features_resampled)

# # Predict using the fairness-aware model
# y_pred = mitigator.predict(X)

# # Evaluate the results
# from sklearn.metrics import classification_report
# print(classification_report(y, y_pred))



              precision    recall  f1-score   support

           0       1.00      0.89      0.94      3963
           1       0.88      1.00      0.94      3251

    accuracy                           0.94      7214
   macro avg       0.94      0.94      0.94      7214
weighted avg       0.95      0.94      0.94      7214



In [34]:
print(classification_report(df['label_value'], df['score']))
print(classification_report(df_encoded['label_value'], df_encoded['score']))
print(classification_report(df_encoded['label_value'], y_pred))

print(df_encoded[df_encoded['entity_id'] == 7756])
# print(df_resampled[df_resampled['entity_id'] == 7756])

print("Accuracy before mitigation: " + str(np.sum(df_encoded['label_value'] == df_encoded['score']) / df_encoded.shape[0]))


print("Accuracy after mitigation: " + str(np.sum(df_encoded['label_value'] == y_pred) / len(y_pred)))


              precision    recall  f1-score   support

           0       0.69      0.68      0.68      3963
           1       0.61      0.63      0.62      3251

    accuracy                           0.65      7214
   macro avg       0.65      0.65      0.65      7214
weighted avg       0.65      0.65      0.65      7214

              precision    recall  f1-score   support

           0       0.69      0.68      0.68      3963
           1       0.61      0.63      0.62      3251

    accuracy                           0.65      7214
   macro avg       0.65      0.65      0.65      7214
weighted avg       0.65      0.65      0.65      7214

              precision    recall  f1-score   support

           0       0.88      0.88      0.88      3963
           1       0.85      0.86      0.85      3251

    accuracy                           0.87      7214
   macro avg       0.87      0.87      0.87      7214
weighted avg       0.87      0.87      0.87      7214

      entity_id  sc

In [27]:
# original_race_columns = ['race_African-American', 'race_Asian', 'race_Caucasian', 'race_Hispanic', 'race_Native American', 'race_Other']
# original_sex_columns = ['sex_Female', 'sex_Male']
# original_age_cat_columns = ['age_cat_25 - 45', 'age_cat_Greater than 45', 'age_cat_Less than 25']

# # Reverse the one-hot encoding for 'race'
# df_resampled['race'] = df_resampled[original_race_columns].idxmax(axis=1).str.replace('race_', '', regex=False)
# df_resampled['race'] = df_resampled['race'].replace({'African-American': 'African-American',
#                                                  'Caucasian': 'Caucasian',
#                                                  'Hispanic': 'Hispanic',
#                                                  'Asian': 'Asian',
#                                                  'Native American': 'Native American',
#                                                  'Other': 'Other'})

# # Reverse the one-hot encoding for 'sex'
# df_resampled['sex'] = df_resampled[original_sex_columns].idxmax(axis=1).str.replace('sex_', '', regex=False)

# # Reverse the one-hot encoding for 'age_cat'
# df_resampled['age_cat'] = df_resampled[original_age_cat_columns].idxmax(axis=1).str.replace('age_cat_', '', regex=False)

# # Drop the one-hot encoded columns
# df_original = df_resampled.drop(columns=original_race_columns + original_sex_columns + original_age_cat_columns)
# df_original['score'] = y_pred
# df_original[df_original['race'] == 'Asian']

df_original = df.copy()
df_original['score'] = y_pred
df_original

Unnamed: 0,entity_id,score,label_value,race,sex,age_cat
0,1,1,0,Other,Male,Greater than 45
1,3,1,1,African-American,Male,25 - 45
2,4,1,1,African-American,Male,Less than 25
3,5,0,0,African-American,Male,Less than 25
4,6,0,0,Other,Male,25 - 45
...,...,...,...,...,...,...
7209,10996,1,0,African-American,Male,Less than 25
7210,10997,0,0,African-American,Male,Less than 25
7211,10999,0,0,Other,Male,Greater than 45
7212,11000,0,0,African-American,Female,25 - 45


In [28]:
audit = Audit(df_original.drop(columns=["entity_id"]), label_column="label_value")
audit.audit()

In [29]:
audit.metrics.round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,accuracy,tpr,tnr,for,fdr,fpr,fnr,npv,precision,ppr,pprev,prev
attribute_name,attribute_value,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
race,African-American,0.87,0.86,0.87,0.14,0.13,0.13,0.14,0.86,0.87,0.57,0.51,0.51
race,Asian,0.94,0.89,0.96,0.04,0.11,0.04,0.11,0.96,0.89,0.0,0.28,0.28
race,Caucasian,0.87,0.86,0.88,0.1,0.18,0.12,0.14,0.9,0.82,0.31,0.41,0.39
race,Hispanic,0.87,0.84,0.89,0.1,0.18,0.11,0.16,0.9,0.82,0.07,0.37,0.36
race,Native American,0.94,1.0,0.88,0.0,0.09,0.12,0.0,1.0,0.91,0.0,0.61,0.56
race,Other,0.87,0.82,0.9,0.1,0.19,0.1,0.18,0.9,0.81,0.04,0.36,0.35
sex,Female,0.89,0.85,0.92,0.08,0.15,0.08,0.15,0.92,0.85,0.15,0.36,0.36
sex,Male,0.86,0.86,0.87,0.13,0.15,0.13,0.14,0.87,0.85,0.85,0.48,0.47
age_cat,25 - 45,0.87,0.86,0.87,0.12,0.15,0.13,0.14,0.88,0.85,0.58,0.46,0.46
age_cat,Greater than 45,0.88,0.84,0.9,0.08,0.21,0.1,0.16,0.92,0.79,0.16,0.34,0.32


In [35]:
audit = Audit(df_original.drop(columns=["entity_id"]), label_column="label_value",
              reference_groups={'race':'Caucasian', 'sex':'Male', 'age_cat':'25 - 45'})

audit.audit(bias_args={
    "alpha": 0.05,
    "check_significance": True,
    "mask_significance": True
})

audit.disparity_df.style

Unnamed: 0,model_id,score_threshold,k,attribute_name,attribute_value,accuracy,tpr,tnr,for,fdr,fpr,fnr,npv,precision,pp,pn,ppr,pprev,fp,fn,tn,tp,group_label_pos,group_label_neg,group_size,total_entities,prev,label_value_significance,score_significance,fdr_disparity,fdr_ref_group_value,fdr_significance,fnr_disparity,fnr_ref_group_value,fnr_significance,for_disparity,for_ref_group_value,for_significance,fpr_disparity,fpr_ref_group_value,fpr_significance,npv_disparity,npv_ref_group_value,npv_significance,ppr_disparity,ppr_ref_group_value,ppr_significance,pprev_disparity,pprev_ref_group_value,pprev_significance,precision_disparity,precision_ref_group_value,precision_significance,tnr_disparity,tnr_ref_group_value,tnr_significance,tpr_disparity,tpr_ref_group_value,tpr_significance
0,0,binary 0/1,3278,race,African-American,0.866071,0.864808,0.867409,0.141676,0.126461,0.132591,0.135192,0.858324,0.873539,1882,1814,0.574131,0.509199,238,257,1557,1644,1901,1795,3696,7214,0.51434,True,True,0.71002,Caucasian,True,0.932825,Caucasian,True,1.466345,Caucasian,True,1.102205,Caucasian,True,0.950124,Caucasian,True,1.872637,Caucasian,True,1.243358,Caucasian,True,1.062841,Caucasian,True,0.986024,Caucasian,True,1.011386,Caucasian,True
1,0,binary 0/1,3278,race,Asian,0.9375,0.888889,0.956522,0.043478,0.111111,0.043478,0.111111,0.956522,0.888889,9,23,0.002746,0.28125,1,1,22,8,9,23,32,7214,0.28125,False,False,0.623836,Caucasian,False,0.766667,Caucasian,False,0.45,Caucasian,False,0.361428,Caucasian,False,1.058824,Caucasian,False,0.008955,Caucasian,False,0.686754,Caucasian,False,1.081517,Caucasian,False,1.087322,Caucasian,False,1.039548,Caucasian,False
2,0,binary 0/1,3278,race,Caucasian,0.870008,0.855072,0.879704,0.096618,0.178109,0.120296,0.144928,0.903382,0.821891,1005,1449,0.306589,0.409535,179,140,1309,826,966,1488,2454,7214,0.393643,False,False,1.0,Caucasian,False,1.0,Caucasian,False,1.0,Caucasian,False,1.0,Caucasian,False,1.0,Caucasian,False,1.0,Caucasian,False,1.0,Caucasian,False,1.0,Caucasian,False,1.0,Caucasian,False,1.0,Caucasian,False
3,0,binary 0/1,3278,race,Hispanic,0.872841,0.836207,0.893827,0.095,0.181435,0.106173,0.163793,0.905,0.818565,237,400,0.0723,0.372057,43,38,362,194,232,405,637,7214,0.364207,False,False,1.018669,Caucasian,False,1.130172,Caucasian,False,0.98325,Caucasian,False,0.882599,Caucasian,False,1.001791,Caucasian,False,0.235821,Caucasian,False,0.908484,Caucasian,False,0.995954,Caucasian,False,1.016054,Caucasian,False,0.977937,Caucasian,False
4,0,binary 0/1,3278,race,Native American,0.944444,1.0,0.875,0.0,0.090909,0.125,0.0,1.0,0.909091,11,7,0.003356,0.611111,1,0,7,10,10,8,18,7214,0.555556,False,False,0.510411,Caucasian,False,0.0,Caucasian,False,0.0,Caucasian,False,1.039106,Caucasian,False,1.106952,Caucasian,False,0.010945,Caucasian,False,1.492206,Caucasian,False,1.106097,Caucasian,False,0.994652,Caucasian,False,1.169492,Caucasian,False
5,0,binary 0/1,3278,race,Other,0.870027,0.819549,0.897541,0.098765,0.186567,0.102459,0.180451,0.901235,0.813433,134,243,0.040879,0.355438,25,24,219,109,133,244,377,7214,0.352785,False,True,1.047486,Caucasian,False,1.245113,Caucasian,False,1.022222,Caucasian,False,0.851726,Caucasian,False,0.997623,Caucasian,False,0.133333,Caucasian,True,0.867905,Caucasian,True,0.989709,Caucasian,False,1.020276,Caucasian,False,0.958455,Caucasian,False
6,0,binary 0/1,3278,sex,Female,0.892473,0.84739,0.917503,0.084538,0.149194,0.082497,0.15261,0.915462,0.850806,496,899,0.151312,0.355556,74,76,823,422,498,897,1395,7214,0.356989,True,True,1.004979,Male,False,1.094106,Male,True,0.668602,Male,True,0.612437,Male,False,1.047967,Male,True,0.178289,Male,True,0.743702,Male,True,0.999132,Male,False,1.060333,Male,False,0.984746,Male,True
7,0,binary 0/1,3278,sex,Male,0.863035,0.860516,0.865297,0.126441,0.148454,0.134703,0.139484,0.873559,0.851546,2782,3037,0.848688,0.478089,413,384,2653,2369,2753,3066,5819,7214,0.473105,False,False,1.0,Male,False,1.0,Male,False,1.0,Male,False,1.0,Male,False,1.0,Male,False,1.0,Male,False,1.0,Male,False,1.0,Male,False,1.0,Male,False,1.0,Male,False
8,0,binary 0/1,3278,age_cat,25 - 45,0.866634,0.857067,0.874775,0.122061,0.146547,0.125225,0.142933,0.877939,0.853453,1897,2212,0.578707,0.46167,278,270,1942,1619,1889,2220,4109,7214,0.459723,False,False,1.0,25 - 45,False,1.0,25 - 45,False,1.0,25 - 45,False,1.0,25 - 45,False,1.0,25 - 45,False,1.0,25 - 45,False,1.0,25 - 45,False,1.0,25 - 45,False,1.0,25 - 45,False,1.0,25 - 45,False
9,0,binary 0/1,3278,age_cat,Greater than 45,0.880076,0.841365,0.897959,0.075454,0.20794,0.102041,0.158635,0.924546,0.79206,529,1047,0.161379,0.33566,110,79,968,419,498,1078,1576,7214,0.31599,True,True,1.418925,25 - 45,True,1.109854,25 - 45,True,0.618161,25 - 45,True,0.814858,25 - 45,True,1.053088,25 - 45,True,0.278861,25 - 45,True,0.727057,25 - 45,True,0.928066,25 - 45,True,1.026503,25 - 45,True,0.98168,25 - 45,True


In [36]:
audit.disparities.style

Unnamed: 0_level_0,Unnamed: 1_level_0,fdr_disparity,fnr_disparity,for_disparity,fpr_disparity,npv_disparity,ppr_disparity,pprev_disparity,precision_disparity,tnr_disparity,tpr_disparity,label_value_significance,score_significance,fdr_significance,fnr_significance,for_significance,fpr_significance,npv_significance,ppr_significance,pprev_significance,precision_significance,tnr_significance,tpr_significance
attribute_name,attribute_value,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
race,African-American,0.71002,0.932825,1.466345,1.102205,0.950124,1.872637,1.243358,1.062841,0.986024,1.011386,True,True,True,True,True,True,True,True,True,True,True,True
race,Asian,0.623836,0.766667,0.45,0.361428,1.058824,0.008955,0.686754,1.081517,1.087322,1.039548,False,False,False,False,False,False,False,False,False,False,False,False
race,Caucasian,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,False,False,False,False,False,False,False,False,False,False,False,False
race,Hispanic,1.018669,1.130172,0.98325,0.882599,1.001791,0.235821,0.908484,0.995954,1.016054,0.977937,False,False,False,False,False,False,False,False,False,False,False,False
race,Native American,0.510411,0.0,0.0,1.039106,1.106952,0.010945,1.492206,1.106097,0.994652,1.169492,False,False,False,False,False,False,False,False,False,False,False,False
race,Other,1.047486,1.245113,1.022222,0.851726,0.997623,0.133333,0.867905,0.989709,1.020276,0.958455,False,True,False,False,False,False,False,True,True,False,False,False
sex,Female,1.004979,1.094106,0.668602,0.612437,1.047967,0.178289,0.743702,0.999132,1.060333,0.984746,True,True,False,True,True,False,True,True,True,False,False,True
sex,Male,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,False,False,False,False,False,False,False,False,False,False,False,False
age_cat,25 - 45,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,False,False,False,False,False,False,False,False,False,False,False,False
age_cat,Greater than 45,1.418925,1.109854,0.618161,0.814858,1.053088,0.278861,0.727057,0.928066,1.026503,0.98168,True,True,True,True,True,True,True,True,True,True,True,True


In [37]:
metrics = ['fpr','fdr']
disparity_tolerance = 1.25

In [38]:
audit.summary_plot(metrics=metrics, fairness_threshold=disparity_tolerance)