In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, recall_score, f1_score
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import ClassificationMetric
from sklearn.model_selection import train_test_split
from aif360.algorithms.postprocessing import EqOddsPostprocessing
from sklearn.pipeline import Pipeline
from aif360.algorithms.postprocessing import DeterministicReranking

  warn_deprecated('vmap', 'torch.vmap')


In [2]:
def compute_performance_metrics(y_test, y_pred, model_name):
    accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1_score_value = f1_score(y_test, y_pred)
    print(f"{model_name} Accuracy: {accuracy}")
    print(f"{model_name} Recall: {recall}")
    print(f"{model_name} F1 Score: {f1_score_value}")

def compute_classification_metric(dataset,predictions, label_name_v, favorable_label_v, unfavorable_label_v, privileged_attribute, unprivileged_attribute):
    features = [privileged_attribute] + unprivileged_attribute # We want to check the fairness level regarding the protected attribute "sex"

    # This is the object made of the original dataset
    aif_sex_dataset = BinaryLabelDataset( # Base class for all structured datasets with binary labels.
            df=dataset,
            favorable_label=favorable_label_v, # This means that a prediction is biased toward the privileged attribute if its value is 1 (True)
            unfavorable_label=unfavorable_label_v,
            label_names=[label_name_v],
            protected_attribute_names=features,
            privileged_protected_attributes=[privileged_attribute], # here we tell AIF that we want to check for predictions
        )

    # We do the same thing but with the predictions dataset
    aif_sex_pred = BinaryLabelDataset(
            df=predictions,
            favorable_label=favorable_label_v,
            unfavorable_label=unfavorable_label_v,
            label_names=[label_name_v],
            protected_attribute_names=features,
            privileged_protected_attributes=[privileged_attribute],
        )

    sex_privileged_group = [{privileged_attribute: 1, **{attr: 0 for attr in unprivileged_attribute}}]
    sex_unprivileged_group = [{attr: 1, privileged_attribute: 0} for attr in unprivileged_attribute]

    # We provide the ClassificationMetric object with all the information needed:
    # aif_sex_dataset - The original test set
    # aif_sex_pred - A dataset containing the predictions of the model
    # sex_privileged_group - The privileged group
    # sex_unprivileged_group - The unprivileged group
    fairness_metrics = ClassificationMetric(dataset=aif_sex_dataset,
                            classified_dataset=aif_sex_pred,
                            unprivileged_groups=sex_unprivileged_group,
                            privileged_groups=sex_privileged_group)
    
    return fairness_metrics

def compute_fairness_metrics(fairness_metrics: ClassificationMetric):
    # Values less than 0 indicate that privileged group has higher
    # proportion of predicted positive outcomes than unprivileged group.
    # Value higher than 0 indicates that unprivileged group has higher proportion
    # of predicted positive outcomes than privileged group.
    SPD = round(fairness_metrics.statistical_parity_difference(),3)

    # Measures the deviation from the equality of opportunity, which means that the same
    # proportion of each population receives the favorable outcome. This measure must be equal to 0 to be fair.
    EOD = round(fairness_metrics.equal_opportunity_difference(),3)

    # Average of difference in False Positive Rate and True Positive Rate for unprivileged and privileged groups
    # A value of 0 indicates equality of odds, which means that samples in both the privileged and unprivileged
    # groups have the same probability of being classified positively.
    AOD = round(fairness_metrics.average_odds_difference(),3)

    print(f"Statistical Parity Difference (SPD): {SPD}")
    print(f"Average Odds Difference (AOD): {AOD}")
    print(f"Equal Opportunity Difference (EOD): {EOD}")

In [3]:
dataset_path = "trained_compas-score.csv"
df_raw = pd.read_csv(dataset_path) 

pd.set_option('display.max_columns', None)

In [4]:
X = df_raw.drop(columns="is_recid")
y = df_raw["is_recid"]

# Define four sets and apply the function
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2, # 0.2 indicates a test set size of 20%
                                                    random_state=42)

In [5]:
sex_features = ["sex_Male", "sex_Female"]
race_features = ["race_Caucasian", "race_African-American", "race_Asian", "race_Other", "race_Native American", "race_Hispanic"]

race_privileged = "race_Caucasian"
race_unprivileged = ["race_African-American", "race_Asian", "race_Other", "race_Native American", "race_Hispanic"]
sex_privileged = "sex_Female"
sex_unprivileged = ["sex_Male"]

In [6]:
# Add your functions here
# compute_performance_metrics and compute_classification_metric
# Add the definition for compute_fairness_metrics

# Train the Decision Tree classifier
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)

# Predizione sui dati di test
dt_predictions = dt_clf.predict(X_test)

# Calcolo delle metriche di performance per il Decision Tree grezzo
compute_performance_metrics(y_test, dt_predictions, "Decision Tree")

# Aggiunta della colonna del target al dataframe di X_test e rimozione di valori mancanti
dataset = X_test.copy(deep=True)
dataset['is_recid'] = y_test
dataset = dataset.dropna()

# Conversione dei dati in formato BinaryLabelDataset
test_dataset = BinaryLabelDataset(df=dataset, label_names=['is_recid'], protected_attribute_names=sex_features)

# Creazione di un dataset di predizioni per il post-processing
predictions = dataset.copy(deep=True)
predictions['is_recid'] = dt_predictions
predictions = predictions.dropna()

test_pred_dataset = BinaryLabelDataset(df=predictions, label_names=['is_recid'], protected_attribute_names=sex_features)

# Applicazione del post-processing con EqOddsPostprocessing
eq_odds = EqOddsPostprocessing(privileged_groups=[{'sex_Female': 1}], unprivileged_groups=[{'sex_Female': 0}])
eq_odds = eq_odds.fit(test_dataset, test_pred_dataset)

# Fai delle predizioni post-processate
eq_odds_pred = eq_odds.predict(test_pred_dataset)
eq_odds_labels = eq_odds_pred.labels

# Calcolo delle metriche di performance per il modello post-processato
compute_performance_metrics(y_test, eq_odds_labels, "Decision Tree Post-processed")

Decision Tree Accuracy: 0.6292325056433409
Decision Tree Recall: 0.4538087520259319
Decision Tree F1 Score: 0.4601479046836483
Decision Tree Post-processed Accuracy: 0.6275395033860045
Decision Tree Post-processed Recall: 0.45705024311183146
Decision Tree Post-processed F1 Score: 0.46078431372549017


In [7]:
# Creazione del dataset di test con le etichette originali e predizioni del modello
test_dataset_with_labels = X_test.copy(deep=True)
test_dataset_with_labels['is_recid'] = y_test

# Creazione del dataset di predizioni del modello post-processato
predictions_post_processed = X_test.copy(deep=True)
predictions_post_processed['is_recid'] = eq_odds_labels

In [8]:
# Calcolo delle metriche di fairness
fairness_metrics = compute_classification_metric(test_dataset_with_labels,predictions_post_processed,'is_recid',0,1,sex_privileged,sex_unprivileged)
compute_fairness_metrics(fairness_metrics)

Statistical Parity Difference (SPD): -0.015
Average Odds Difference (AOD): 0.002
Equal Opportunity Difference (EOD): -0.004


In [9]:
svm_classifier = make_pipeline(StandardScaler(), SVC(kernel='linear'))

# Addestra il classificatore sui dati di training
svm_classifier.fit(X_train, y_train)

# After the training phase, the model will be tested by predicting the values on the test set
svm_scores = svm_classifier.decision_function(X_test)
svm_prediction = svm_classifier.predict(X_test)

compute_performance_metrics(y_test,svm_prediction,"SVM")

# Aggiunta della colonna del target al dataframe di X_test e rimozione di valori mancanti
dataset = X_test.copy(deep=True)
dataset['is_recid'] = y_test
dataset = dataset.dropna()

# Conversione dei dati in formato BinaryLabelDataset
test_dataset = BinaryLabelDataset(df=dataset, label_names=['is_recid'], protected_attribute_names=sex_features)

# Creazione di un dataset di predizioni per il post-processing
predictions = dataset.copy(deep=True)
predictions['is_recid'] = svm_prediction
predictions = predictions.dropna()

test_pred_dataset = BinaryLabelDataset(df=predictions, label_names=['is_recid'], protected_attribute_names=sex_features)

# Applicazione del post-processing con EqOddsPostprocessing
eq_odds = EqOddsPostprocessing(privileged_groups=[{'sex_Female': 1}], unprivileged_groups=[{'sex_Female': 0}])
eq_odds = eq_odds.fit(test_dataset, test_pred_dataset)

# Fai delle predizioni post-processate
eq_odds_pred = eq_odds.predict(test_pred_dataset)
eq_odds_labels = eq_odds_pred.labels

# Calcolo delle metriche di performance per il modello post-processato
compute_performance_metrics(y_test, eq_odds_labels, "SVM Post-processed")

SVM Accuracy: 0.7048532731376975
SVM Recall: 0.29497568881685576
SVM F1 Score: 0.41037204058624577
SVM Post-processed Accuracy: 0.6766365688487584
SVM Post-processed Recall: 0.13452188006482982
SVM Post-processed F1 Score: 0.22462787550744248


In [10]:
# Creazione del dataset di test con le etichette originali e predizioni del modello
test_dataset_with_labels = X_test.copy(deep=True)
test_dataset_with_labels['is_recid'] = y_test

# Creazione del dataset di predizioni del modello post-processato
predictions_post_processed = X_test.copy(deep=True)
predictions_post_processed['is_recid'] = eq_odds_labels

# Calcolo delle metriche di fairness
fairness_metrics = compute_classification_metric(test_dataset_with_labels,predictions_post_processed,'is_recid',0,1,sex_privileged,sex_unprivileged)
compute_fairness_metrics(fairness_metrics)

Statistical Parity Difference (SPD): -0.01
Average Odds Difference (AOD): -0.005
Equal Opportunity Difference (EOD): 0.003


In [11]:
rf_classifier = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth = None, random_state=42)

rf_classifier.fit(X_train, y_train)

rf_predictions = rf_classifier.predict(X_test)

compute_performance_metrics(y_test,rf_predictions,"Random Forest")

# Aggiunta della colonna del target al dataframe di X_test e rimozione di valori mancanti
dataset = X_test.copy(deep=True)
dataset['is_recid'] = y_test
dataset = dataset.dropna()

# Conversione dei dati in formato BinaryLabelDataset
test_dataset = BinaryLabelDataset(df=dataset, label_names=['is_recid'], protected_attribute_names=sex_features)

# Creazione di un dataset di predizioni per il post-processing
predictions = dataset.copy(deep=True)
predictions['is_recid'] = rf_predictions
predictions = predictions.dropna()

test_pred_dataset = BinaryLabelDataset(df=predictions, label_names=['is_recid'], protected_attribute_names=sex_features)


# Applicazione del post-processing con EqOddsPostprocessing
eq_odds = EqOddsPostprocessing(privileged_groups=[{'sex_Female': 1}], unprivileged_groups=[{'sex_Female': 0}])
eq_odds = eq_odds.fit(test_dataset, test_pred_dataset)

# Fai delle predizioni post-processate
eq_odds_pred = eq_odds.predict(test_pred_dataset)
eq_odds_labels = eq_odds_pred.labels

# Calcolo delle metriche di performance per il modello post-processato
compute_performance_metrics(y_test, eq_odds_labels, "Random Forest Post-processed")

Random Forest Accuracy: 0.7059819413092551
Random Forest Recall: 0.3841166936790924
Random Forest F1 Score: 0.4763819095477387
Random Forest Post-processed Accuracy: 0.6721218961625283
Random Forest Post-processed Recall: 0.16531604538087522
Random Forest Post-processed F1 Score: 0.25987261146496815


In [12]:
# Creazione del dataset di test con le etichette originali e predizioni del modello
test_dataset_with_labels = X_test.copy(deep=True)
test_dataset_with_labels['is_recid'] = y_test

# Creazione del dataset di predizioni del modello post-processato
predictions_post_processed = X_test.copy(deep=True)
predictions_post_processed['is_recid'] = eq_odds_labels

# Calcolo delle metriche di fairness
fairness_metrics = compute_classification_metric(test_dataset_with_labels,predictions_post_processed,'is_recid',0,1,sex_privileged,sex_unprivileged)
compute_fairness_metrics(fairness_metrics)

Statistical Parity Difference (SPD): -0.01
Average Odds Difference (AOD): -0.001
Equal Opportunity Difference (EOD): -0.001


In [13]:
race_privileged_group = [{race_privileged: 1, **{attr: 0 for attr in race_unprivileged}}]
race_unprivileged_group = [{attr: 1, race_privileged: 0} for attr in race_unprivileged]

print(race_privileged_group)
print(race_unprivileged_group)

[{'race_Caucasian': 1, 'race_African-American': 0, 'race_Asian': 0, 'race_Other': 0, 'race_Native American': 0, 'race_Hispanic': 0}]
[{'race_African-American': 1, 'race_Caucasian': 0}, {'race_Asian': 1, 'race_Caucasian': 0}, {'race_Other': 1, 'race_Caucasian': 0}, {'race_Native American': 1, 'race_Caucasian': 0}, {'race_Hispanic': 1, 'race_Caucasian': 0}]


In [14]:
# Add your functions here
# compute_performance_metrics and compute_classification_metric
# Add the definition for compute_fairness_metrics

# Train the Decision Tree classifier
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)

# Predizione sui dati di test
dt_predictions = dt_clf.predict(X_test)

# Calcolo delle metriche di performance per il Decision Tree grezzo
compute_performance_metrics(y_test, dt_predictions, "Decision Tree")

# Aggiunta della colonna del target al dataframe di X_test e rimozione di valori mancanti
dataset = X_test.copy(deep=True)
dataset['is_recid'] = y_test
dataset = dataset.dropna()

# Conversione dei dati in formato BinaryLabelDataset
test_dataset = BinaryLabelDataset(df=dataset, label_names=['is_recid'], protected_attribute_names=race_features)

# Creazione di un dataset di predizioni per il post-processing
predictions = dataset.copy(deep=True)
predictions['is_recid'] = dt_predictions
predictions = predictions.dropna()

test_pred_dataset = BinaryLabelDataset(df=predictions, label_names=['is_recid'], protected_attribute_names=race_features)

# Applicazione del post-processing con EqOddsPostprocessing
eq_odds = EqOddsPostprocessing(privileged_groups=race_privileged_group, unprivileged_groups=race_unprivileged_group)
eq_odds = eq_odds.fit(test_dataset, test_pred_dataset)

# Fai delle predizioni post-processate
eq_odds_pred = eq_odds.predict(test_pred_dataset)
eq_odds_labels = eq_odds_pred.labels

# Calcolo delle metriche di performance per il modello post-processato
compute_performance_metrics(y_test, eq_odds_labels, "Decision Tree Post-processed")

Decision Tree Accuracy: 0.6292325056433409
Decision Tree Recall: 0.4538087520259319
Decision Tree F1 Score: 0.4601479046836483
Decision Tree Post-processed Accuracy: 0.6100451467268623
Decision Tree Post-processed Recall: 0.3549432739059968
Decision Tree Post-processed F1 Score: 0.3879539415411869


In [15]:
# Creazione del dataset di test con le etichette originali e predizioni del modello
test_dataset_with_labels = X_test.copy(deep=True)
test_dataset_with_labels['is_recid'] = y_test

# Creazione del dataset di predizioni del modello post-processato
predictions_post_processed = X_test.copy(deep=True)
predictions_post_processed['is_recid'] = eq_odds_labels

# Calcolo delle metriche di fairness
fairness_metrics = compute_classification_metric(test_dataset_with_labels,predictions_post_processed,'is_recid',0,1,race_privileged,race_unprivileged)
compute_fairness_metrics(fairness_metrics)

Statistical Parity Difference (SPD): -0.01
Average Odds Difference (AOD): 0.001
Equal Opportunity Difference (EOD): -0.003


In [16]:
svm_classifier = make_pipeline(StandardScaler(), SVC(kernel='linear'))

# Addestra il classificatore sui dati di training
svm_classifier.fit(X_train, y_train)

# After the training phase, the model will be tested by predicting the values on the test set
svm_scores = svm_classifier.decision_function(X_test)
svm_prediction = svm_classifier.predict(X_test)

compute_performance_metrics(y_test,svm_prediction,"SVM")

# Aggiunta della colonna del target al dataframe di X_test e rimozione di valori mancanti
dataset = X_test.copy(deep=True)
dataset['is_recid'] = y_test
dataset = dataset.dropna()

# Conversione dei dati in formato BinaryLabelDataset
test_dataset = BinaryLabelDataset(df=dataset, label_names=['is_recid'], protected_attribute_names=race_features)

# Creazione di un dataset di predizioni per il post-processing
predictions = dataset.copy(deep=True)
predictions['is_recid'] = svm_prediction
predictions = predictions.dropna()

test_pred_dataset = BinaryLabelDataset(df=predictions, label_names=['is_recid'], protected_attribute_names=race_features)

# Applicazione del post-processing con EqOddsPostprocessing
eq_odds = EqOddsPostprocessing(privileged_groups=race_privileged_group, unprivileged_groups=race_unprivileged_group)
eq_odds = eq_odds.fit(test_dataset, test_pred_dataset)

# Fai delle predizioni post-processate
eq_odds_pred = eq_odds.predict(test_pred_dataset)
eq_odds_labels = eq_odds_pred.labels

# Calcolo delle metriche di performance per il modello post-processato
compute_performance_metrics(y_test, eq_odds_labels, "SVM Post-processed")

SVM Accuracy: 0.7048532731376975
SVM Recall: 0.29497568881685576
SVM F1 Score: 0.41037204058624577
SVM Post-processed Accuracy: 0.6783295711060948
SVM Post-processed Recall: 0.1912479740680713
SVM Post-processed F1 Score: 0.29280397022332505


In [17]:
# Creazione del dataset di test con le etichette originali e predizioni del modello
test_dataset_with_labels = X_test.copy(deep=True)
test_dataset_with_labels['is_recid'] = y_test

# Creazione del dataset di predizioni del modello post-processato
predictions_post_processed = X_test.copy(deep=True)
predictions_post_processed['is_recid'] = eq_odds_labels

# Calcolo delle metriche di fairness
fairness_metrics = compute_classification_metric(test_dataset_with_labels,predictions_post_processed,'is_recid',0,1,race_privileged,race_unprivileged)
compute_fairness_metrics(fairness_metrics)

Statistical Parity Difference (SPD): -0.014
Average Odds Difference (AOD): -0.001
Equal Opportunity Difference (EOD): -0.004


In [18]:
rf_classifier = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth = None, random_state=42)

rf_classifier.fit(X_train, y_train)

rf_predictions = rf_classifier.predict(X_test)

compute_performance_metrics(y_test,rf_predictions,"Random Forest")

# Aggiunta della colonna del target al dataframe di X_test e rimozione di valori mancanti
dataset = X_test.copy(deep=True)
dataset['is_recid'] = y_test
dataset = dataset.dropna()

# Conversione dei dati in formato BinaryLabelDataset
test_dataset = BinaryLabelDataset(df=dataset, label_names=['is_recid'], protected_attribute_names=race_features)

# Creazione di un dataset di predizioni per il post-processing
predictions = dataset.copy(deep=True)
predictions['is_recid'] = rf_predictions
predictions = predictions.dropna()

test_pred_dataset = BinaryLabelDataset(df=predictions, label_names=['is_recid'], protected_attribute_names=race_features)


# Applicazione del post-processing con EqOddsPostprocessing
eq_odds = EqOddsPostprocessing(privileged_groups=race_privileged_group, unprivileged_groups=race_unprivileged_group)
eq_odds = eq_odds.fit(test_dataset, test_pred_dataset)

# Fai delle predizioni post-processate
eq_odds_pred = eq_odds.predict(test_pred_dataset)
eq_odds_labels = eq_odds_pred.labels

# Calcolo delle metriche di performance per il modello post-processato
compute_performance_metrics(y_test, eq_odds_labels, "Random Forest Post-processed")

Random Forest Accuracy: 0.7059819413092551
Random Forest Recall: 0.3841166936790924
Random Forest F1 Score: 0.4763819095477387
Random Forest Post-processed Accuracy: 0.6834085778781038
Random Forest Post-processed Recall: 0.23014586709886548
Random Forest Post-processed F1 Score: 0.336094674556213


In [19]:
# Creazione del dataset di test con le etichette originali e predizioni del modello
test_dataset_with_labels = X_test.copy(deep=True)
test_dataset_with_labels['is_recid'] = y_test

# Creazione del dataset di predizioni del modello post-processato
predictions_post_processed = X_test.copy(deep=True)
predictions_post_processed['is_recid'] = eq_odds_labels

# Calcolo delle metriche di fairness
fairness_metrics = compute_classification_metric(test_dataset_with_labels,predictions_post_processed,'is_recid',0,1,race_privileged,race_unprivileged)
compute_fairness_metrics(fairness_metrics)

Statistical Parity Difference (SPD): -0.014
Average Odds Difference (AOD): 0.002
Equal Opportunity Difference (EOD): -0.004
