In [7]:
# Install required libraries (if not installed)
!pip install aif360 pandas matplotlib scikit-learn



In [8]:
# Import libraries
# setting up the environment
import pandas as pd
import matplotlib.pyplot as plt
from aif360.datasets import CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms.preprocessing import Reweighing


In [9]:
# load and preprocess the dataset 
import pandas as pd
from aif360.datasets import StandardDataset

def default_preprocessing(df):
    """Perform the same preprocessing as the original analysis:
    https://github.com/propublica/compas-analysis/blob/master/Compas%20Analysis.ipynb
    """
    return df[(df.days_b_screening_arrest <= 30)
            & (df.days_b_screening_arrest >= -30)
            & (df.is_recid != -1)
            & (df.c_charge_degree != 'O')
            & (df.score_text != 'N/A')]

# Load COMPAS dataset from local file
df = pd.read_csv('compas-scores-two-years.csv', index_col='id')

# Create StandardDataset
dataset = StandardDataset(df, 
                          label_name='two_year_recid', 
                          favorable_classes=[0], 
                          protected_attribute_names=['sex', 'race'], 
                          privileged_classes=[['Female'], ['Caucasian']], 
                          categorical_features=['age_cat', 'c_charge_degree', 'c_charge_desc'], 
                          features_to_keep=['sex', 'age', 'age_cat', 'race', 
                                            'juv_fel_count', 'juv_misd_count', 'juv_other_count', 
                                            'priors_count', 'c_charge_degree', 'c_charge_desc', 
                                            'two_year_recid'], 
                          custom_preprocessing=default_preprocessing)

# Split into training and test sets
train, test = dataset.split([0.7], shuffle=True)

# Identify protected attribute
protected_attr = 'race'
privileged_groups = [{'race': 1}]  # White
unprivileged_groups = [{'race': 0}]  # Black




In [10]:
# Compute fairness metrics 
# Basic dataset metric
metric_train = BinaryLabelDatasetMetric(train,
                                        unprivileged_groups=unprivileged_groups,
                                        privileged_groups=privileged_groups)

print("Mean difference (favours white if positive):", metric_train.mean_difference())


Mean difference (favours white if positive): -0.08551074229388411


In [11]:
# Apply reweighing to mitigate bias
RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
train_transf = RW.fit_transform(train)

# Compute fairness metrics after reweighing
metric_transf = BinaryLabelDatasetMetric(train_transf,
                                         unprivileged_groups=unprivileged_groups,
                                         privileged_groups=privileged_groups)
print("Mean difference after reweighing:", metric_transf.mean_difference())


Mean difference after reweighing: -1.1102230246251565e-16


In [12]:
# visualize Bias
# Plot false positive rates by race
fpr_by_race = train.labels.ravel()
race_groups = train.protected_attributes.ravel()
plt.figure(figsize=(6,4))
plt.bar(['Black', 'White'], [fpr_by_race[race_groups==0].mean(),
                             fpr_by_race[race_groups==1].mean()],
        color=['red', 'blue'])
plt.ylabel('False Positive Rate')
plt.title('FPR by Race in COMPAS Dataset')
plt.show()


AttributeError: 'StandardDataset' object has no attribute 'label'