In [16]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, recall_score, f1_score
from aif360.datasets import BinaryLabelDataset
from aif360.datasets import StandardDataset
from aif360.metrics import ClassificationMetric
from sklearn.model_selection import train_test_split


In [6]:
dataset_path = 'datasets/compas-scores.csv'
df_raw = pd.read_csv(dataset_path) 

In [7]:
df_raw.shape

(11757, 47)

In [8]:
df_raw.columns

Index(['id', 'name', 'first', 'last', 'compas_screening_date', 'sex', 'dob',
       'age', 'age_cat', 'race', 'juv_fel_count', 'decile_score',
       'juv_misd_count', 'juv_other_count', 'priors_count',
       'days_b_screening_arrest', 'c_jail_in', 'c_jail_out', 'c_case_number',
       'c_offense_date', 'c_arrest_date', 'c_days_from_compas',
       'c_charge_degree', 'c_charge_desc', 'is_recid', 'num_r_cases',
       'r_case_number', 'r_charge_degree', 'r_days_from_arrest',
       'r_offense_date', 'r_charge_desc', 'r_jail_in', 'r_jail_out',
       'is_violent_recid', 'num_vr_cases', 'vr_case_number',
       'vr_charge_degree', 'vr_offense_date', 'vr_charge_desc',
       'v_type_of_assessment', 'v_decile_score', 'v_score_text',
       'v_screening_date', 'type_of_assessment', 'decile_score.1',
       'score_text', 'screening_date'],
      dtype='object')

In [9]:
df_raw.head()

Unnamed: 0,id,name,first,last,compas_screening_date,sex,dob,age,age_cat,race,...,vr_offense_date,vr_charge_desc,v_type_of_assessment,v_decile_score,v_score_text,v_screening_date,type_of_assessment,decile_score.1,score_text,screening_date
0,1,miguel hernandez,miguel,hernandez,2013-08-14,Male,1947-04-18,69,Greater than 45,Other,...,,,Risk of Violence,1,Low,2013-08-14,Risk of Recidivism,1,Low,2013-08-14
1,2,michael ryan,michael,ryan,2014-12-31,Male,1985-02-06,31,25 - 45,Caucasian,...,,,Risk of Violence,2,Low,2014-12-31,Risk of Recidivism,5,Medium,2014-12-31
2,3,kevon dixon,kevon,dixon,2013-01-27,Male,1982-01-22,34,25 - 45,African-American,...,2013-07-05,Felony Battery (Dom Strang),Risk of Violence,1,Low,2013-01-27,Risk of Recidivism,3,Low,2013-01-27
3,4,ed philo,ed,philo,2013-04-14,Male,1991-05-14,24,Less than 25,African-American,...,,,Risk of Violence,3,Low,2013-04-14,Risk of Recidivism,4,Low,2013-04-14
4,5,marcu brown,marcu,brown,2013-01-13,Male,1993-01-21,23,Less than 25,African-American,...,,,Risk of Violence,6,Medium,2013-01-13,Risk of Recidivism,8,High,2013-01-13


In [10]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11757 entries, 0 to 11756
Data columns (total 47 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       11757 non-null  int64  
 1   name                     11757 non-null  object 
 2   first                    11757 non-null  object 
 3   last                     11757 non-null  object 
 4   compas_screening_date    11757 non-null  object 
 5   sex                      11757 non-null  object 
 6   dob                      11757 non-null  object 
 7   age                      11757 non-null  int64  
 8   age_cat                  11757 non-null  object 
 9   race                     11757 non-null  object 
 10  juv_fel_count            11757 non-null  int64  
 11  decile_score             11757 non-null  int64  
 12  juv_misd_count           11757 non-null  int64  
 13  juv_other_count          11757 non-null  int64  
 14  priors_count          

In [11]:
columns_to_drop = ['num_r_cases',
'r_case_number',
'r_charge_degree',
'r_days_from_arrest',
'r_offense_date',
'r_charge_desc',
'r_jail_in',
'r_jail_out',
'is_violent_recid',
'num_vr_cases',
'vr_case_number',
'vr_charge_degree',
'vr_offense_date',
'vr_charge_desc',
'v_type_of_assessment',
'v_decile_score',
'v_score_text',
'v_screening_date',
'c_arrest_date',
'decile_score.1',
'screening_date',
'id',
'compas_screening_date',
'type_of_assessment',
'dob',
'name',
'first',
'last',
'score_text',
'juv_fel_count',
'juv_misd_count',
'juv_other_count',
'c_jail_in',
'c_jail_out',
'c_offense_date',
'c_case_number']

df_raw = df_raw.drop(columns=columns_to_drop)
df_raw = df_raw.dropna()


In [12]:
categorical_columns = ["sex", "age_cat", "race", "c_charge_degree", "c_charge_desc"]
df_raw = pd.get_dummies(df_raw, columns=categorical_columns)

In [17]:
# Remember to use the SMOTE X and y
X = df_raw.drop(columns="is_recid")
y = df_raw["is_recid"]

# Define four sets and apply the function
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2, # 0.2 indicates a test set size of 20%
                                                    random_state=42)

In [14]:
dt_clf = DecisionTreeClassifier(random_state=42)

# The fit function will do the trick
dt_clf.fit(X_train, y_train)

# After the training phase, the model will be tested by predicting the values on the test set
dt_predictions = dt_clf.predict(X_test)

dt_accuracy = accuracy_score(y_test, dt_predictions)
dt_recall = recall_score(y_test, dt_predictions)
dt_f1_score = f1_score(y_test, dt_predictions)
print(f"Decision Tree Accuracy: {dt_accuracy}")
print(f"Decision Tree Recall: {dt_recall}")
print(f"Decision Tree F1 Score: {dt_f1_score}")

Decision Tree Accuracy: 0.6263008514664143
Decision Tree Recall: 0.4092827004219409
Decision Tree F1 Score: 0.42419825072886297


In [15]:
sex_features = ['sex_Male','sex_Female'] # We want to check the fairness level regarding the protected attribute "sex"


dataset = X_test.copy(deep=True) # we create a copy of the test set
dataset['is_recid'] = y_test  # and join the target feature with the others

predictions = dataset.copy(deep=True) # we do the same task
predictions['is_recid'] = dt_predictions # but this time the target feature is made by the predictions of our model

# In this way, we have two datasets. One (dataset) is the original test set containing the original values of features,
# the other (predictions) contains the original values except for the target one, that is now made of model's predictions

# These will be used by AIF to compare the classifications of the model with the original values to
# understand if the model's answers create favouritism toward the privileged attribute


# This is the object made of the original dataset
aif_sex_dataset = BinaryLabelDataset( # Base class for all structured datasets with binary labels.
        df=dataset,
        favorable_label=1, # This means that a prediction is biased toward the privileged attribute if its value is 1 (True)
        unfavorable_label=0,
        label_names=['is_recid'],
        protected_attribute_names=sex_features,
        privileged_protected_attributes=['sex_Male'], # here we tell AIF that we want to check for predictions
                                                      # that somehow privilege the attribute "sex_Male"
    )

# We do the same thing but with the predictions dataset
aif_sex_pred = BinaryLabelDataset(
        df=predictions,
        favorable_label=1,
        unfavorable_label=0,
        label_names=['is_recid'],
        protected_attribute_names=sex_features,
        privileged_protected_attributes=['sex_Male'],
    )

sex_privileged_group = [{'sex_Male': 1, 'sex_Female': 0}] # The privileged group is made of males (sex_Male = True)
sex_unprivileged_group = [{'sex_Female': 1, 'sex_Male': 0}] # The unprivileged group is made of females (sex_Female = True)

# We provide the ClassificationMetric object with all the information needed:
# aif_sex_dataset - The original test set
# aif_sex_pred - A dataset containing the predictions of the model
# sex_privileged_group - The privileged group
# sex_unprivileged_group - The unprivileged group
fairness_metrics = ClassificationMetric(dataset=aif_sex_dataset,
                               classified_dataset=aif_sex_pred,
                               unprivileged_groups=sex_unprivileged_group,
                               privileged_groups=sex_privileged_group)

# Values less than 0 indicate that privileged group has higher
# proportion of predicted positive outcomes than unprivileged group.
# Value higher than 0 indicates that unprivileged group has higher proportion
# of predicted positive outcomes than privileged group.
SPD = round(fairness_metrics.statistical_parity_difference(),3)

# Measures the deviation from the equality of opportunity, which means that the same
# proportion of each population receives the favorable outcome. This measure must be equal to 0 to be fair.
EOD = round(fairness_metrics.equal_opportunity_difference(),3)

# Average of difference in False Positive Rate and True Positive Rate for unprivileged and privileged groups
# A value of 0 indicates equality of odds, which means that samples in both the privileged and unprivileged
# groups have the same probability of being classified positively.
AOD = round(fairness_metrics.average_odds_difference(),3)

print(f"Statistical Parity Difference (SPD): {SPD}")
print(f"Equal Opportunity Difference (EOD): {EOD}")
print(f"Average Odds Difference: {AOD}")

Statistical Parity Difference (SPD): -0.114
Equal Opportunity Difference (EOD): -0.207
Average Odds Difference: -0.133
