In [None]:
import numpy as np
%load_ext autoreload
%autoreload 2
import pandas as pd
from helpers.training import *
from helpers.visualizations import *
from helpers.preprocessing import *
from helpers.aequitas_methods import *
from helpers.mitigation_methods import *
from aequitas.flow.methods.inprocessing import FairlearnClassifier
from sklearn.metrics import classification_report
from IPython.display import display, HTML
from aequitas import Audit
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import openml
import warnings
import math
from helpers.adult_dataset_preps import *
warnings.filterwarnings(action='ignore')

In [None]:
df = initial_dataset_preprocess()
target = "target"
median = df[target].median()
print("median value:", median)

def categorize_target(target):
    if target < median:
        return 0
    if target >= median:
        return 1
df["target"] = df["target"].apply(categorize_target)
protected_attributes = ["age","race","marital status","sex"]

In [None]:
plot_attributes(df,protected_attributes, target, num_rows=2, num_cols=2)

In [None]:

df_num = encode_and_scale(df,target)          
df['marital status'] = df['marital status'].astype(int)
df['sex'] = df['sex'].astype(int)

'''
Top 3 protected attributes: Race, MaritalStatus, Age, Sex. Create new dataset with new columns containing the intersection of these attributes. Convert them to numerical.
'''
df_intersect = df.copy()
df_intersect["race/maritalstatus"] = df_intersect["race"].astype(str) + " " +  df_intersect["marital status"].astype(str)
df_intersect["race/age"] = df_intersect["race"].astype(str) + " " + df_intersect["age"].astype(str)
df_intersect["maritalstatus/age"] = df_intersect["marital status"].astype(str) + " " + df_intersect["age"].astype(str)
df_intersect["sex/race"] = df_intersect["sex"].astype(str) + " " +  df_intersect["race"].astype(str)
df_intersect["sex/age"] = df_intersect["sex"].astype(str) + " " +  df_intersect["age"].astype(str)
df_intersect["sex/maritalstatus"] = df_intersect["sex"].astype(str) + " " +  df_intersect["marital status"].astype(str)
df_intersect.drop(["race", "marital status", "age","sex"], axis=1, inplace=True)

df_num_int = encode_and_scale(df_intersect,target)
protected_attributes_int = ["race/maritalstatus","race/age","maritalstatus/age","sex/race","sex/age","sex/maritalstatus"]

In [None]:
def split_and_train(data, attributes):
    y = data.loc[:, target]
    x = data.drop(target, axis=1)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0)
    models = {"Catboost",
                  "LogisticRegression",
                  "RandomForest",
                  "DecisionTree"
                  }
    y_predicted_dict = {}
    metrics_dict = {}
    for m in models:
            model = choose_model(m, x_train, y_train)
            y_predicted =  evaluate_model(model, x_train, x_test, y_train, y_test)
            attribute_metrics  = calc_metrics(x_test=x_test,y_test=y_test,y_predicted=y_predicted,attributes=attributes, target=target)
            metrics_dict[m] = attribute_metrics
            y_predicted_dict[m] = y_predicted
            print(f"Classification report for model: {model} : \n {classification_report(y_test, y_predicted)}")
            # plot_roc_curve(y_true=y_test, y_pred=y_predicted, model_name=m)
    return x_test, y_test, y_predicted_dict, metrics_dict

def plot_audit(dataset, attributes):
    audit = Audit(dataset[['score','label_value']+attributes])
    summary = audit.summary_plot(["tpr","fpr","fnr","tnr","pprev"])
    summary.show()

In [None]:
"""
Train and calculate fairness metrics for original dataset.
"""
x_test, y_test, y_predicted_dict, metrics_dict = split_and_train(df,protected_attributes)

In [None]:
df_test = calc_fairness_report(x_test, y_test, y_predicted_dict['DecisionTree'], target, protected_attributes, display_disp=True)
# 
# calc_fairness_report(x_test, y_test, y_predicted_dict['DecisionTree'], target, protected_attributes, display_disp=True)
# calc_fairness_report(x_test, y_test, y_predicted_dict['LogisticRegression'], target, protected_attributes, display_disp=True)


In [None]:
"""
Train and calculate fariness metrics for the dataset obtained by the intersection of protected attributes.
"""
x_test_int, y_test_int, y_predicted_dict_int, metrics_dict_int = split_and_train(df_num_int,protected_attributes_int)

In [None]:
df_test_int = calc_fairness_report(x_test_int, y_test_int, y_predicted_dict_int['DecisionTree'], target, protected_attributes_int,display_disp=True)

# Apply pre-processing bias mitigation methods

In [None]:
'''
1: Massaging method (Flips selected labels to reduce prevalence disparity between groups), train and calculate metrics.
'''

#transform data
data_to_transform = df_num_int.copy()
for attr in protected_attributes_int:
    data_transformed_mess = pre_process_massaging(data_to_transform,attr,target)
    data_transformed_mess[attr] = data_transformed_mess[attr].astype(int)
    data_to_transform = data_transformed_mess.copy()

# data_transformed_m1 = pre_process_massaging(data_to_transform, "race/maritalstatus",target)
# data_transformed_m1["race/maritalstatus"] = data_transformed_m1["race/maritalstatus"].astype(int)
# data_transformed_m2 = pre_process_massaging(data_transformed_m1, "race/age",target)
# data_transformed_m2["race/age"] = data_transformed_m2["race/age"].astype(int)
# data_transformed_m3= pre_process_massaging(data_transformed_m2, "maritalstatus/age",target)
# data_transformed_m3["maritalstatus/age"] = data_transformed_m3["maritalstatus/age"].astype(int)

#train on the transformed dataset
x_test_ms, y_test_ms, y_predicted_dict_transformed, metrics_ms_dict = split_and_train(data_to_transform,protected_attributes_int)


In [None]:
#calculate the fairness report
df_test_transformed =calc_fairness_report(x_test_ms, y_test_ms, y_predicted_dict_transformed["Catboost"],target,protected_attributes_int, display_disp=True)

In [None]:
# compare disparities to the original dataset
plot_audit(df_test, protected_attributes)
plot_audit(df_test_int, protected_attributes_int)
plot_audit(df_test_transformed, protected_attributes_int)

In [None]:
''' 
2: Prevalance Sampling: Generates a training sample with controllable balanced prevalence for the groups in dataset, either by undersampling or oversampling.
'''
#transform data
data_to_transform = df_num_int.copy()
for attr in protected_attributes_int:
    data_transformed_ps = pre_process_prev_sampling(data_to_transform,attr,target)
    data_transformed_ps[attr] = data_transformed_ps[attr].astype(int)
    data_to_transform = data_transformed_ps.copy()
# data_transformed_ps1 = pre_process_prev_sampling(data_to_transform, "race/maritalstatus",target)
# data_transformed_ps1["race/maritalstatus"] = data_transformed_ps1["race/maritalstatus"].astype(int)
# data_transformed_ps2 = pre_process_prev_sampling(data_transformed_ps1, "race/age",target)
# data_transformed_ps2["race/age"] = data_transformed_ps2["race/age"].astype(int)
# data_transformed_ps3= pre_process_prev_sampling(data_transformed_ps2, "maritalstatus/age",target)
# data_transformed_ps3["maritalstatus/age"] = data_transformed_ps3["maritalstatus/age"].astype(int)

#train the new dataset
x_test_ps, y_test_ps, y_predicted_dict_ps, metrics_ps_dict = split_and_train(data_to_transform,protected_attributes_int)

In [None]:
df_test_transformed_ps =calc_fairness_report(x_test_ps, y_test_ps, y_predicted_dict_ps["Catboost"],target,protected_attributes_int, display_disp=True)

#compare disparities
plot_audit(df_test_int, protected_attributes_int)
plot_audit(df_test_transformed_ps, protected_attributes_int)

In [None]:
print(np.shape(df))
print(np.shape(data_transformed_ps))
plot_attributes(data_transformed_ps,protected_attributes_int)

In [None]:
'''
3. Data repairer: Transforms the data distribution so that a given feature distribution is marginally independent of the sensitive attribute, s. 
'''

#transform data
data_to_transform = df_num_int.copy()
columns_to_change = df_num_int.columns.difference(protected_attributes_int).tolist()
columns_to_change.remove(target)

for attr in protected_attributes_int:
    data_transformed_dr = pre_process_data_repairer(data_to_transform,attr,target,columns_to_change)
    data_transformed_dr[attr] = data_transformed_dr[attr].astype(int)
    data_to_transform = data_transformed_dr.copy()
# data_transformed_dr1 = pre_process_data_repairer(data_to_transform, "race/age",target,columns_to_change)
# data_transformed_dr2 = pre_process_data_repairer(data_transformed_dr1, "race/maritalstatus",target,columns_to_change)
# data_transformed_dr3 = pre_process_data_repairer(data_transformed_dr2, "maritalstatus/age",target,columns_to_change)
# data_transformed_dr3["race/age"] = data_transformed_dr3["race/age"].astype(int)
# data_transformed_dr3["race/maritalstatus"] = data_transformed_dr3["race/maritalstatus"].astype(int)
# data_transformed_dr3["maritalstatus/age"] = data_transformed_dr3["maritalstatus/age"].astype(int)

#train
x_test_dr, y_test_dr, y_predicted_dict_dr, metrics_dr_dict = split_and_train(data_to_transform,protected_attributes_int)

In [None]:
  
#calc metrics
df_test_transformed_dr =calc_fairness_report(x_test_dr, y_test_dr, y_predicted_dict_dr["Catboost"],target,protected_attributes_int)

#compare disparities
plot_audit(df_test_int, protected_attributes_int)
plot_audit(df_test_transformed_dr, protected_attributes_int)

In [None]:
'''
4. Label flipping
'''

#transform data
data_to_transform = df_num_int.copy()
columns_to_change = df_num_int.columns.difference(protected_attributes_int).tolist()
columns_to_change.remove(target)

for attr in protected_attributes_int:
    data_transformed_lf = pre_process_label_flip(data_to_transform,attr,target)
    data_transformed_lf[attr] = data_transformed_lf[attr].astype(int)
    data_to_transform = data_transformed_lf.copy()

# data_transformed_lf1 = pre_process_label_flip(data_to_transform, 'maritalstatus/age', target)
# data_transformed_lf2 = pre_process_label_flip(data_transformed_lf1, 'race/maritalstatus', target)
# data_transformed_lf3 = pre_process_label_flip(data_transformed_lf2, 'race/age', target)
# data_transformed_lf3['maritalstatus/age'] = data_transformed_lf3['maritalstatus/age'].astype(int)
# data_transformed_lf3['race/maritalstatus'] = data_transformed_lf3['race/maritalstatus'].astype(int)
# data_transformed_lf3['race/age'] = data_transformed_lf3['race/age'].astype(int)

#train
x_test_lf, y_test_lf, y_predicted_dict_lf, metrics_lf_dict = split_and_train(data_to_transform,
                                                                             protected_attributes_int)


In [None]:

#calc metrics
df_test_transformed_lf = calc_fairness_report(x_test_lf, y_test_lf, y_predicted_dict_lf["LogisticRegression"], target,
                                              protected_attributes_int, display_disp=True)

#compare disparities
plot_audit(df_test_int, protected_attributes_int)
plot_audit(df_test_transformed_lf, protected_attributes_int)

In [None]:
'''
In-processing: Fair learn classifier 
'''
def train_with_fairlearn(data, attribute, model, metrics_dict):
    y = data.loc[:, target]
    x = data.drop(target, axis=1)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0)
    s_train = x_train[attribute]
    x_train = x_train.drop(columns= [attribute], axis=1)
    s_test = x_test[attribute]
    x_test = x_test.drop(columns= [attribute], axis=1)
    
    fairlearn_clf = FairlearnClassifier(estimator=model, constraint="fairlearn.reductions.EqualizedOdds",reduction='fairlearn.reductions.ExponentiatedGradient')
    
    fairlearn_clf.fit(x_train, y_train, s_train) 
    y_train_pred = fairlearn_clf.predict_proba(x_train,s_train).astype(int)
    y_test_pred = fairlearn_clf.predict_proba(x_test,s_test).astype(int)
    x_test.insert(len(x_test.columns)-1,attribute,s_test,True) # insert back the attribute
    print("Model:\n", model)
    print(f"Accuracy score training:\n{accuracy_score(y_train, y_train_pred):.4f}")
    print(f"Accuracy score test:\n{accuracy_score(y_test, y_test_pred):.4f}")
    print(f"Classification report for model: {model} : \n {classification_report(y_test, y_test_pred)}")
    metric  = calc_metrics(x_test=x_test,y_test=y_test,y_predicted=y_test_pred,attributes=[attribute], target=target)
    metrics_dict[model] =metric
    return x_test, y_test, y_test_pred, metrics_dict


data_to_transform = df_num_int.copy()
models = ["sklearn.tree.DecisionTreeClassifier"]
metrics_dict = {}
x_test_fl, y_test_fl, y_test_pred_fl, metrics_dict_fl =train_with_fairlearn(data_to_transform, "race/age", models[0], metrics_dict)

In [None]:
from fairlearn.reductions.
from fairlearn.reductions._moments

In [None]:
#calc metrics
df_test_transformed_fl =calc_fairness_report(x_test_fl, y_test_fl, y_test_pred_fl,target,protected_attributes_int, display_disp=True)

#compare disparities
plot_audit(df_test_int, protected_attributes_int)
plot_audit(df_test_transformed_fl, protected_attributes_int)


In [None]:
df_test_fairlearn = post_process_group_threshold_fairlearn(df_num_int, protected_attributes_int, target)

In [None]:
plot_audit(df_test_int, protected_attributes_int)
plot_audit(df_test_fairlearn, protected_attributes_int)

In [None]:
df_test_aequitas = post_process_group_threshold_aequitas(df_num_int, 'race/age', target)


In [None]:
plot_audit(df_test_int, protected_attributes_int)
plot_audit(df_test_aequitas, ["race/age"])

In [None]:
metrics_rf = {}
metrics_rf["Initial dataset"] = metrics_dict_int["RandomForest"]['race/age']
metrics_rf["Prevelance Sampling"] = metrics_ps_dict["RandomForest"]['race/age']
metrics_rf["Massaging"] = metrics_ms_dict["RandomForest"]['race/age']
metrics_rf["Data repairer"] = metrics_dr_dict["RandomForest"]['race/age']
plot_metrics(metrics_rf,'race/age')

In [None]:
metrics_rf = {}
metrics_rf["Initial dataset"] = metrics_dict_int["RandomForest"]['race/maritalstatus']
metrics_rf["Prevelance Sampling"] = metrics_ps_dict["RandomForest"]['race/maritalstatus']
metrics_rf["Massaging"] = metrics_ms_dict["RandomForest"]['race/maritalstatus']
plot_metrics(metrics_rf,'race/maritalstatus')

In [None]:
metrics_rf = {}
metrics_rf["Initial dataset"] = metrics_dict_int["RandomForest"]['maritalstatus/age']
metrics_rf["Prevelance Sampling"] = metrics_ps_dict["RandomForest"]['maritalstatus/age']
metrics_rf["Massaging"] = metrics_ms_dict["RandomForest"]['maritalstatus/age']
plot_metrics(metrics_rf,'maritalstatus/age')

In [None]:

index = 4 
plt.figure(figsize=(10, 6))
accuracy_scores = {}
accuracy_scores["Orignal dataset"] = {"Catboost": 0.79, "RandomForest":0.78, "DecisionTree":0.77, "LR":0.76}
accuracy_scores["Intersectional dataset"] = {"Catboost": 0.8, "RandomForest":0.78, "DecisionTree":0.76, "LR":0.76}
accuracy_scores["Massaging method"] = {"Catboost": 0.92, "RandomForest":0.87, "DecisionTree":0.86, "LR":0.83}
accuracy_scores["Prevelance Sampling"] = {"Catboost": 0.77, "RandomForest":0.75, "DecisionTree":0.73, "LR":0.73}
accuracy_scores["Data repairer"] = {"Catboost": 0.78, "RandomForest":0.76, "DecisionTree":0.75, "LR":0.76}

accuracy_df = pd.DataFrame(accuracy_scores).T
# Set the plot style
sns.set(style="whitegrid")
sns.set_palette("blend:#7AB,#EDA")
# Plot each method's accuracy scores
accuracy_df.plot(kind='bar', figsize=(10, 6))

# Add title and labels
plt.title("Accuracy Scores by Dataset and Model", fontsize=16)
plt.xlabel("Method", fontsize=12)
plt.ylabel("Accuracy Score", fontsize=12)
plt.xticks(rotation=45)
plt.legend(title="Model", bbox_to_anchor=(1.05, 1), loc='upper left')

# Show the plot
plt.tight_layout()
plt.show()