In [1]:
import pandas as pd
import numpy as np

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)\

import facts
from facts.clean import clean_dataset
from facts import valid_ifthens_with_coverage_correctness, rules2rulesbyif
from facts.models import customLogisticRegression
from facts.parameters import ParameterProxy
from facts.formatting import recourse_report_reverse, print_recourse_report, print_recourse_report_cumulative, print_recourse_report_KStest_cumulative
from facts.utils import load_rules_by_if
from aif360.sklearn.datasets import fetch_compas

In [2]:
cor_thres = 0.5
cost_budget = 10
top_count = 20
c_inf = 5

In [3]:
X, y = fetch_compas()
X['target'] = y.values

sensitive_attribute = "race"
df = clean_dataset(X,'compas')
df.head()

Unnamed: 0,sex,age_cat,race,juv_fel_count,juv_misd_count,juv_other_count,priors_count,c_charge_degree,target
0,Male,25 - 45,African-American,0,0,0,0,F,0
1,Male,Less than 25,African-American,0,0,1,4,F,0
2,Male,25 - 45,Caucasian,0,0,0,14,F,0
3,Female,25 - 45,Caucasian,0,0,0,0,M,1
4,Male,25 - 45,Caucasian,0,0,0,0,F,1


In [4]:
y = df['target']
X = df.drop('target', axis=1)

num_features = X._get_numeric_data().columns.to_list()
cate_features = X.select_dtypes(include=['object','category']).columns.to_list()
ord_features = []

In [5]:
rules_with_atomic_correctness = load_rules_by_if("rulesCompas_atomic.data")
rules_with_cumulative_correctness = load_rules_by_if("rulesCompas_cumulative.data")

# Old Metrics

In [6]:
num_features = X._get_numeric_data().columns.to_list()
cate_features = X.select_dtypes(include=['object','category']).columns.to_list()
ord_features = []
feature_weights = {'sex':100,	'age_cat':10,	'race':100,	'juv_fel_count':1,	'juv_misd_count':1,	'juv_other_count':1,	'priors_count':1,	'c_charge_degree':1}

features_with_binary_cost = cate_features
features_with_proportional_cost = num_features


comparators = facts.feature_change_builder(
    X,
    num_cols=features_with_proportional_cost,
    cate_cols=features_with_binary_cost,
    ord_cols=ord_features,
    feature_weights=feature_weights,
    num_normalization=True,
    #feats_to_normalize = ["capital-gain","capital-loss"]
)
params = ParameterProxy(featureChanges=comparators)

## Weighted Average

In [7]:
top_rules, subgroup_costs = facts.select_rules_subset(
    rules_with_atomic_correctness,
    metric = "weighted-average",
    sort_strategy = "abs-diff-decr",
    top_count = 50,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
    ],
    params=params
)

print_recourse_report(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True
)

If [1mjuv_fel_count = 0, juv_other_count = 0, priors_count = 1, sex = Male[0m:
	Protected Subgroup '[1mAfrican-American[0m', [34m8.51%[39m covered
		Make [1m[31mpriors_count = 0[39m[0m with effectiveness [32m15.00%[39m.
		Make [1m[31mpriors_count = 2[39m[0m with effectiveness [32m0.00%[39m.
		Make [1m[31mpriors_count = 3[39m[0m with effectiveness [32m0.00%[39m.
		Make [1m[31mpriors_count = 4[39m[0m with effectiveness [32m0.00%[39m.
		[1mAggregate cost[0m of the above recourses = [35m-1.43[39m
	Protected Subgroup '[1mCaucasian[0m', [34m3.07%[39m covered
		Make [1m[31mpriors_count = 0[39m[0m with effectiveness [32m100.00%[39m.
		Make [1m[31mpriors_count = 2[39m[0m with effectiveness [32m0.00%[39m.
		Make [1m[31mpriors_count = 3[39m[0m with effectiveness [32m0.00%[39m.
		Make [1m[31mpriors_count = 4[39m[0m with effectiveness [32m0.00%[39m.
		[1mAggregate cost[0m of the above recourses = [35m-9.50[39m
	[35mBias against Afri

## Minimum Cost Above Threshold -- Equal Cost of Effectiveness (Macro)

In [8]:
top_rules, subgroup_costs = facts.select_rules_subset(
    rules_with_atomic_correctness,
    metric="min-above-thr",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=50,
    cor_threshold=0.7,
    filter_sequence = [
        "remove-contained",
        "remove-below-thr",        
        # "remove-fair-rules",
        "keep-only-min-change"
    ],
    params=params
)

print_recourse_report(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True
)

If [1mage_cat = Less than 25, juv_other_count = 1, priors_count = 0[0m:
	Protected Subgroup '[1mAfrican-American[0m', [34m2.13%[39m covered
		Make [1m[31mage_cat = 25 - 45[39m, [31mjuv_other_count = 0[39m[0m with effectiveness [32m100.00%[39m.
		Make [1m[31mage_cat = Greater than 45[39m, [31mjuv_other_count = 0[39m[0m with effectiveness [32m100.00%[39m.
		[1mAggregate cost[0m of the above recourses = [35m10.14[39m
	Protected Subgroup '[1mCaucasian[0m', [34m1.84%[39m covered
		Make [1m[31mjuv_other_count = 0[39m[0m with effectiveness [32m100.00%[39m.
		[1mAggregate cost[0m of the above recourses = [35m0.14[39m
	[35mBias against African-American. Unfairness score = 10.0.[39m
If [1mage_cat = Less than 25, c_charge_degree = M, juv_fel_count = 0, juv_other_count = 1[0m:
	Protected Subgroup '[1mAfrican-American[0m', [34m1.06%[39m covered
		Make [1m[31mage_cat = 25 - 45[39m, [31mjuv_other_count = 0[39m[0m with effectiveness [32m100.00%[3

## Number of Rules Above Threshold -- Equal Choice for Recourse

In [9]:
top_rules, subgroup_costs = facts.select_rules_subset(
    rules_with_atomic_correctness,
    metric="num-above-thr",
    sort_strategy="generic-sorting-ignore-forall-subgroups-empty",
    top_count=50,
    cor_threshold=0.7,
    filter_sequence = [
        "remove-contained",
        "remove-below-thr",
        # "remove-fair-rules"
    ],
    params=params
)

print_recourse_report(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True
)

If [1mage_cat = 25 - 45, c_charge_degree = M, priors_count = 9[0m:
	Protected Subgroup '[1mAfrican-American[0m', [34m1.06%[39m covered
		Make [1m[31mpriors_count = 0[39m[0m with effectiveness [32m100.00%[39m.
		Make [1m[31mage_cat = Greater than 45[39m, [31mpriors_count = 0[39m[0m with effectiveness [32m100.00%[39m.
		Make [1m[31mage_cat = Greater than 45[39m, [31mc_charge_degree = F[39m, [31mpriors_count = 0[39m[0m with effectiveness [32m100.00%[39m.
		Make [1m[31mage_cat = Greater than 45[39m, [31mc_charge_degree = F[39m, [31mpriors_count = 1[39m[0m with effectiveness [32m100.00%[39m.
		Make [1m[31mage_cat = Greater than 45[39m, [31mpriors_count = 1[39m[0m with effectiveness [32m100.00%[39m.
		Make [1m[31mage_cat = Greater than 45[39m, [31mc_charge_degree = F[39m, [31mpriors_count = 2[39m[0m with effectiveness [32m100.00%[39m.
		Make [1m[31mage_cat = Greater than 45[39m, [31mc_charge_degree = F[39m, [31mpriors_count = 3

In [10]:
# rules_with_cumulative = facts.cum_corr_costs_all(rules_by_if, X_test, model, sensitive_attribute="race", params=params)
# from facts.utils import load_rules_by_if, save_rules_by_if
# save_rules_by_if("rulesCompas_atomic.data", rules_by_if)
# save_rules_by_if("rulesCompas_cumulative.data", rules_with_cumulative)

# New Metrics

## Fairness of Mean Recourse Cost ($c_\infty$ implementation) -- Equal Mean Recourse

In [11]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="fairness-of-mean-recourse-cinf",
    c_inf=c_inf,
    sort_strategy="generic-sorting",
    top_count=top_count,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules"
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True
)

If [1mage_cat = 25 - 45, c_charge_degree = F, juv_fel_count = 0, juv_other_count = 0, sex = Female[0m:
	Protected Subgroup '[1mAfrican-American[0m', [34m1.91%[39m covered
		Make [1m[31mc_charge_degree = M[39m[0m with effectiveness [32m11.11%[39m and counterfactual cost = 1.0.
		Make [1m[31mage_cat = Less than 25[39m[0m with effectiveness [32m11.11%[39m and counterfactual cost = 10.0.
		Make [1m[31mage_cat = Greater than 45[39m[0m with effectiveness [32m33.33%[39m and counterfactual cost = 10.0.
		Make [1m[31mage_cat = Less than 25[39m, [31mc_charge_degree = M[39m[0m with effectiveness [32m33.33%[39m and counterfactual cost = 11.0.
		Make [1m[31mage_cat = Greater than 45[39m, [31mc_charge_degree = M[39m[0m with effectiveness [32m33.33%[39m and counterfactual cost = 11.0.
		[1mAggregate cost[0m of the above recourses = [35m39.00[39m
	Protected Subgroup '[1mCaucasian[0m', [34m3.07%[39m covered
		Make [1m[31mc_charge_degree = M[39m[0m wit

## Fairness of Mean Recourse Cost (conditional expectation implementation) -- Equal Conditional Mean Recourse

In [12]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="fairness-of-mean-recourse-conditional",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules"
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True
)

If [1mage_cat = Less than 25, juv_fel_count = 0, juv_misd_count = 0, juv_other_count = 1, priors_count = 0[0m:
	Protected Subgroup '[1mAfrican-American[0m', [34m2.13%[39m covered
		Make [1m[31mjuv_other_count = 0[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.14.
		Make [1m[31mjuv_other_count = 0[39m, [31mpriors_count = 1[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.17.
		Make [1m[31mage_cat = 25 - 45[39m, [31mjuv_other_count = 0[39m[0m with effectiveness [32m100.00%[39m and counterfactual cost = 10.14.
		Make [1m[31mage_cat = Greater than 45[39m, [31mjuv_other_count = 0[39m[0m with effectiveness [32m100.00%[39m and counterfactual cost = 10.14.
		Make [1m[31mage_cat = 25 - 45[39m, [31mjuv_other_count = 0[39m, [31mpriors_count = 1[39m[0m with effectiveness [32m100.00%[39m and counterfactual cost = 10.17.
		Make [1m[31mage_cat = Greater than 45[39m, [31mjuv_other_count = 0[39m, [31mpriors_co

## Fairness of Recourse at Effectiveness Level -- Equal Cost of Effectiveness (Micro)

In [13]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="min-above-corr",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    cor_threshold = cor_thres,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
        # "keep-cheap-rules-above-thr-cor",
        # "keep-only-min-change",
        
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True
)

If [1mage_cat = Less than 25, juv_misd_count = 0, juv_other_count = 1, priors_count = 1[0m:
	Protected Subgroup '[1mAfrican-American[0m', [34m2.34%[39m covered
		Make [1m[31mjuv_other_count = 0[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 0.14.
		Make [1m[31mjuv_other_count = 0[39m, [31mpriors_count = 0[39m[0m with effectiveness [32m18.18%[39m and counterfactual cost = 0.17.
		Make [1m[31mage_cat = 25 - 45[39m, [31mjuv_other_count = 0[39m[0m with effectiveness [32m100.00%[39m and counterfactual cost = 10.14.
		Make [1m[31mage_cat = Greater than 45[39m, [31mjuv_other_count = 0[39m[0m with effectiveness [32m100.00%[39m and counterfactual cost = 10.14.
		Make [1m[31mage_cat = 25 - 45[39m, [31mjuv_other_count = 0[39m, [31mpriors_count = 0[39m[0m with effectiveness [32m100.00%[39m and counterfactual cost = 10.17.
		Make [1m[31mage_cat = Greater than 45[39m, [31mjuv_other_count = 0[39m, [31mpriors_count = 0[39m[0m wit

## Fairness of Effectiveness -- Equal Effectiveness

In [14]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="total-correctness",
    sort_strategy="generic-sorting-ignore-forall-subgroups-empty",
    top_count=top_count,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
        # "keep-only-min-change"
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True,
    correctness_metric=True
)

If [1mjuv_other_count = 1, priors_count = 0[0m:
	Protected Subgroup '[1mAfrican-American[0m', [34m2.13%[39m covered
		Make [1m[31mjuv_other_count = 0[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.14.
		Make [1m[31mjuv_other_count = 0[39m, [31mpriors_count = 1[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.17.
		Make [1m[31mjuv_other_count = 0[39m, [31mpriors_count = 2[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.2.
		Make [1m[31mjuv_other_count = 0[39m, [31mpriors_count = 3[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.22.
		Make [1m[31mjuv_other_count = 0[39m, [31mpriors_count = 4[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.25.
		Make [1m[31mjuv_other_count = 0[39m, [31mpriors_count = 5[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.27.
		[1mAggregate cost[0m of the above recourses = [35m0.10[

## Fairness of Recourse (using two-sample Kolmogorov-Smirnov test) -- Fair Effectiveness-Cost Trade-Off

In [15]:
# taken from the other notebooks
affected_pop_sizes = {"African-American": 483, "Caucasian": 262}

top_rules,unfairness = facts.select_rules_subset_KStest(
    rules_with_cumulative_correctness,
    affected_pop_sizes,
    top_count=top_count
)

print_recourse_report_KStest_cumulative(
    top_rules,
    population_sizes=affected_pop_sizes,
    unfairness = unfairness,
    show_then_costs=True
    # show_cumulative_plots=True
)

If [1mc_charge_degree = F, juv_other_count = 0, priors_count = 1, sex = Male[0m:
	Protected Subgroup '[1mAfrican-American[0m', [34m7.45%[39m covered out of 483
		Make [1m[31mpriors_count = 0[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 0.03.
		Make [1m[31mpriors_count = 2[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 0.03.
		Make [1m[31mpriors_count = 3[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 0.05.
		Make [1m[31mc_charge_degree = M[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mc_charge_degree = M[39m, [31mpriors_count = 2[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.03.
		Make [1m[31mc_charge_degree = M[39m, [31mpriors_count = 0[39m[0m with effectiveness [32m100.00%[39m and counterfactual cost = 1.03.
		Make [1m[31mc_charge_degree = M[39m, [31mpriors_count = 3[39m[0m with effectiveness [32m100.00%[39m and 

## Fairness of Effectiveness at Recourse Budget -- Equal Effectiveness within Budget (Micro)

In [16]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="max-upto-cost",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    cost_threshold = cost_budget,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
        # "remove-above-thr-cost"
        #"remove-below-thr",
        #"keep-only-min-change",
        
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True,
    correctness_metric = True
)

If [1mjuv_other_count = 1, priors_count = 0[0m:
	Protected Subgroup '[1mAfrican-American[0m', [34m2.13%[39m covered
		Make [1m[31mjuv_other_count = 0[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.14.
		Make [1m[31mjuv_other_count = 0[39m, [31mpriors_count = 1[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.17.
		Make [1m[31mjuv_other_count = 0[39m, [31mpriors_count = 2[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.2.
		Make [1m[31mjuv_other_count = 0[39m, [31mpriors_count = 3[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.22.
		Make [1m[31mjuv_other_count = 0[39m, [31mpriors_count = 4[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.25.
		Make [1m[31mjuv_other_count = 0[39m, [31mpriors_count = 5[39m[0m with effectiveness [32m10.00%[39m and counterfactual cost = 0.27.
		[1mAggregate cost[0m of the above recourses = [35m0.10[