In [1]:
!pip install "aequitas==1.0.0" &> /dev/null
import pandas as pd
import numpy as np
from aequitas.group import Group
from aequitas.bias import Bias
from aequitas.plotting import Plot

import seaborn as sns
from aequitas.audit import Audit
from aequitas.fairness import Fairness
import aequitas.plot as ap

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [2]:
df = pd.read_csv("https://github.com/dssg/aequitas/raw/master/examples/data/compas_for_aequitas.csv")
df.head()

Unnamed: 0,entity_id,score,label_value,race,sex,age_cat
0,1,0.0,0,Other,Male,Greater than 45
1,3,0.0,1,African-American,Male,25 - 45
2,4,0.0,1,African-American,Male,Less than 25
3,5,1.0,0,African-American,Male,Less than 25
4,6,0.0,0,Other,Male,25 - 45


In [3]:
df.drop_duplicates()

Unnamed: 0,entity_id,score,label_value,race,sex,age_cat
0,1,0.0,0,Other,Male,Greater than 45
1,3,0.0,1,African-American,Male,25 - 45
2,4,0.0,1,African-American,Male,Less than 25
3,5,1.0,0,African-American,Male,Less than 25
4,6,0.0,0,Other,Male,25 - 45
...,...,...,...,...,...,...
7209,10996,1.0,0,African-American,Male,Less than 25
7210,10997,0.0,0,African-American,Male,Less than 25
7211,10999,0.0,0,Other,Male,Greater than 45
7212,11000,0.0,0,African-American,Female,25 - 45


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline

# Assuming your data has categorical variables like 'race' and target variable 'score'
# First, we apply one-hot encoding to the categorical columns
df_encoded = pd.get_dummies(df, columns=['race'], drop_first=True)
df_encoded = pd.get_dummies(df_encoded, columns=['sex'], drop_first=True)
df_encoded = pd.get_dummies(df_encoded, columns=['age_cat'], drop_first=True)



# Separate the features and target variable
X = df_encoded.drop(columns=['score'])
y = df_encoded['score']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Over-sampling and under-sampling pipeline
over = SMOTE(sampling_strategy=0.9)  # Over-sample the minority class
under = RandomUnderSampler(sampling_strategy=0.9)  # Under-sample the majority class

steps = [('over', over), ('under', under), ('model', RandomForestClassifier())]
pipeline = Pipeline(steps=steps)

# Train the model
pipeline.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.65      0.63      0.64      1176
         1.0       0.58      0.59      0.58       989

    accuracy                           0.61      2165
   macro avg       0.61      0.61      0.61      2165
weighted avg       0.61      0.61      0.61      2165



In [57]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity
from sklearn.ensemble import RandomForestClassifier
import warnings
# Import the warning module.
warnings.simplefilter(action='ignore', category=FutureWarning)

df_encoded = pd.get_dummies(df, columns=['race'], drop_first=True)
df_encoded = pd.get_dummies(df_encoded, columns=['sex'], drop_first=True)
df_encoded = pd.get_dummies(df_encoded, columns=['age_cat'], drop_first=True)

# Separate the features and target variable
X = df_encoded.drop(columns=['label_value'])  # Assuming 'score' is your target variable
y = df_encoded['label_value']

# Over-sampling and under-sampling pipeline
smote = SMOTE(sampling_strategy=0.9)
under = RandomUnderSampler(sampling_strategy=0.9)

X_resampled, y_resampled = smote.fit_resample(X, y)
X_resampled, y_resampled = under.fit_resample(X_resampled, y_resampled)

# update df after resampling
df_resampled = pd.concat([pd.DataFrame(X_resampled, columns=X.columns), pd.DataFrame(y_resampled, columns=['label_value'])], axis=1)


# Use the race-related columns as sensitive features
sensitive_features_resampled = X_resampled[['race_Asian', 'race_Caucasian', 'race_Hispanic', 'race_Native American', 'race_Other']]

# Define a RandomForestClassifier as the base model
rf = RandomForestClassifier()

# Apply fairness constraint (Demographic Parity) across the resampled 'race' columns
constraint = DemographicParity()

# Apply the fairness constraint using ExponentiatedGradient
mitigator = ExponentiatedGradient(estimator=rf, constraints=constraint)

# Fit the model with resampled data and resampled sensitive features (race columns)
mitigator.fit(X_resampled, y_resampled, sensitive_features=sensitive_features_resampled)

# Predict using the fairness-aware model
y_pred = mitigator.predict(X_resampled)

# Evaluate the results
from sklearn.metrics import classification_report
print(classification_report(y_resampled, y_pred))



              precision    recall  f1-score   support

           0       0.99      0.90      0.95      3962
           1       0.90      0.99      0.95      3566

    accuracy                           0.95      7528
   macro avg       0.95      0.95      0.95      7528
weighted avg       0.95      0.95      0.95      7528



In [60]:
print(classification_report(df_encoded['label_value'], df_encoded['score']))

print(classification_report(df_resampled['label_value'], y_pred))

print("Accuracy before mitigation: " + str(np.sum(df_encoded['label_value'] == df_encoded['score']) / df_encoded.shape[0]))


print("Accuracy after mitigation: " + str(np.sum(df_resampled['label_value'] == y_pred) / len(y_pred)))


              precision    recall  f1-score   support

           0       0.69      0.68      0.68      3963
           1       0.61      0.63      0.62      3251

    accuracy                           0.65      7214
   macro avg       0.65      0.65      0.65      7214
weighted avg       0.65      0.65      0.65      7214

              precision    recall  f1-score   support

           0       0.99      0.90      0.95      3962
           1       0.90      0.99      0.95      3566

    accuracy                           0.95      7528
   macro avg       0.95      0.95      0.95      7528
weighted avg       0.95      0.95      0.95      7528

Accuracy before mitigation: 0.6537288605489326
Accuracy after mitigation: 0.946865037194474


In [63]:
original_race_columns = ['race_Asian', 'race_Caucasian', 'race_Hispanic', 'race_Native American', 'race_Other']
original_sex_columns = ['sex_Male']
original_age_cat_columns = ['age_cat_Greater than 45', 'age_cat_Less than 25']

# Reverse the one-hot encoding for 'race'
df_encoded['race'] = df_encoded[original_race_columns].idxmax(axis=1).str.replace('race_', '', regex=False)
df_encoded['race'] = df_encoded['race'].replace({'Caucasian': 'Caucasian',
                                                 'Hispanic': 'Hispanic',
                                                 'Asian': 'Asian',
                                                 'Native American': 'Native American',
                                                 'Other': 'Other'})

# Reverse the one-hot encoding for 'sex'
df_encoded['sex'] = df_encoded[original_sex_columns].idxmax(axis=1).str.replace('sex_', '', regex=False)

# Reverse the one-hot encoding for 'age_cat'
df_encoded['age_cat'] = df_encoded[original_age_cat_columns].idxmax(axis=1).str.replace('age_cat_', '', regex=False)

# Drop the one-hot encoded columns
df_original = df_encoded.drop(columns=original_race_columns + original_sex_columns + original_age_cat_columns)
df_original

Unnamed: 0,entity_id,score,label_value,race,sex,age_cat
0,1,0.0,0,Other,Male,Greater than 45
1,3,0.0,1,Asian,Male,Greater than 45
2,4,0.0,1,Asian,Male,Less than 25
3,5,1.0,0,Asian,Male,Less than 25
4,6,0.0,0,Other,Male,Greater than 45
...,...,...,...,...,...,...
7209,10996,1.0,0,Asian,Male,Less than 25
7210,10997,0.0,0,Asian,Male,Less than 25
7211,10999,0.0,0,Other,Male,Greater than 45
7212,11000,0.0,0,Asian,Male,Greater than 45


In [66]:
audit = Audit(df_original.drop(columns=["entity_id"]), label_column="label_value")
audit.audit()

In [67]:
audit.metrics.round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,accuracy,tpr,tnr,for,fdr,fpr,fnr,npv,precision,ppr,pprev,prev
attribute_name,attribute_value,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
race,Asian,0.64,0.72,0.56,0.35,0.37,0.44,0.28,0.65,0.63,0.66,0.59,0.51
race,Caucasian,0.67,0.52,0.77,0.29,0.41,0.23,0.48,0.71,0.59,0.26,0.35,0.39
race,Hispanic,0.66,0.44,0.79,0.29,0.46,0.21,0.56,0.71,0.54,0.06,0.3,0.36
race,Native American,0.78,0.9,0.62,0.17,0.25,0.38,0.1,0.83,0.75,0.0,0.67,0.56
race,Other,0.67,0.32,0.85,0.3,0.46,0.15,0.68,0.7,0.54,0.02,0.21,0.35
sex,Male,0.65,0.63,0.68,0.31,0.39,0.32,0.37,0.69,0.61,1.0,0.46,0.45
age_cat,Greater than 45,0.66,0.58,0.72,0.29,0.4,0.28,0.42,0.71,0.6,0.7,0.41,0.42
age_cat,Less than 25,0.62,0.74,0.46,0.42,0.36,0.54,0.26,0.58,0.64,0.3,0.65,0.57
