In [1]:
import pandas as pd
import numpy as np

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)\

import facts
from facts.clean import clean_dataset
from facts import valid_ifthens_with_coverage_correctness, rules2rulesbyif
from facts.models import customLogisticRegression
from facts.parameters import ParameterProxy
from facts.formatting import recourse_report_reverse, print_recourse_report, print_recourse_report_cumulative, print_recourse_report_KStest_cumulative
from facts.utils import load_rules_by_if
from aif360.sklearn.datasets import fetch_compas

In [2]:
cor_thres = 0.5
cost_budget = 10
top_count = 20
c_inf = 5

In [3]:
urlfile= "https://raw.githubusercontent.com/samuel-yeom/fliptest/master/exact-ot/chicago-ssl-clean.csv"

X = pd.read_csv(urlfile)
sensitive_attribute = "SEX CODE CD"
df = clean_dataset(X,'SSL')
df.head()

Unnamed: 0,SSL SCORE,PREDICTOR RAT AGE AT LATEST ARREST,PREDICTOR RAT VICTIM SHOOTING INCIDENTS,PREDICTOR RAT VICTIM BATTERY OR ASSAULT,PREDICTOR RAT ARRESTS VIOLENT OFFENSES,PREDICTOR RAT GANG AFFILIATION,PREDICTOR RAT NARCOTIC ARRESTS,PREDICTOR RAT TREND IN CRIMINAL ACTIVITY,PREDICTOR RAT UUW ARRESTS,SEX CODE CD,RACE CODE CD
0,0,20-30,0,1,4,1,1,"(0.3, 7.3]",1,M,BLK
1,0,20-30,0,9,1,1,0,"(0.3, 7.3]",0,M,WHI
2,0,10-20,1,2,1,1,3,"(0.3, 7.3]",0,M,WHI
3,0,10-20,1,1,4,1,0,"(0.3, 7.3]",1,M,BLK
4,0,10-20,0,0,5,0,0,"(0.3, 7.3]",3,M,BLK


In [4]:
y = df['SSL SCORE']
X = df.drop('SSL SCORE', axis=1)

num_features = X._get_numeric_data().columns.to_list()
cate_features = X.select_dtypes(include=['object','category']).columns.to_list()
ord_features=[]

In [5]:
rules_with_atomic_correctness = load_rules_by_if("rulesSSL_atomic.data")
rules_with_cumulative_correctness = load_rules_by_if("rulesSSL_cumulative.data")

# Old Metrics

In [6]:
feature_weights = {'PREDICTOR RAT AGE AT LATEST ARREST':10,	'PREDICTOR RAT VICTIM SHOOTING INCIDENTS':1,	
'PREDICTOR RAT VICTIM BATTERY OR ASSAULT':1,	'PREDICTOR RAT ARRESTS VIOLENT OFFENSES':1,	'PREDICTOR RAT GANG AFFILIATION':1,
'PREDICTOR RAT NARCOTIC ARRESTS':1,	'PREDICTOR RAT TREND IN CRIMINAL ACTIVITY':1,	'PREDICTOR RAT UUW ARRESTS':1,	'SEX CODE CD':100,	'RACE CODE CD':100}

features_with_binary_cost = cate_features
features_with_proportional_cost = num_features


comparators = facts.feature_change_builder(
    X,
    num_cols=features_with_proportional_cost,
    cate_cols=features_with_binary_cost,
    ord_cols=ord_features,
    feature_weights=feature_weights,
    num_normalization=True,
    #feats_to_normalize = ["capital-gain","capital-loss"]
)
params = ParameterProxy(featureChanges=comparators)

## Weighted Average

In [7]:
top_rules, subgroup_costs = facts.select_rules_subset(
    rules_with_atomic_correctness,
    metric = "weighted-average",
    sort_strategy = "abs-diff-decr",
    top_count = 50,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
    ],
    params=params
)

print_recourse_report(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True
)

If [1mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 0, PREDICTOR RAT NARCOTIC ARRESTS = 3, PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0, PREDICTOR RAT VICTIM SHOOTING INCIDENTS = 0[0m:
	Protected Subgroup '[1mWHI[0m', [34m1.17%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m[0m with effectiveness [32m48.15%[39m.
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 1[39m[0m with effectiveness [32m25.93%[39m.
		Make [1m[31mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 1[39m, [31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m[0m with effectiveness [32m0.00%[39m.
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 2[39m[0m with effectiveness [32m14.81%[39m.
		[1mAggregate cost[0m of the above recourses = [35m-3.18[39m
	Protected Subgroup '[1mBLK[0m', [34m1.30%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m[0m with effectiveness [32m67.03%[39m.
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 1[39m[0m with effectiveness [32m52.75%[39m.


## Minimum Cost Above Threshold -- Equal Cost of Effectiveness (Macro)

In [8]:
top_rules, subgroup_costs = facts.select_rules_subset(
    rules_with_atomic_correctness,
    metric="min-above-thr",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=50,
    cor_threshold=0.7,
    filter_sequence = [
        "remove-contained",
        "remove-below-thr",        
        # "remove-fair-rules",
        "keep-only-min-change"
    ],
    params=params
)

print_recourse_report(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True
)

If [1mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 0, PREDICTOR RAT NARCOTIC ARRESTS = 1, PREDICTOR RAT VICTIM SHOOTING INCIDENTS = 0, SEX CODE CD = F[0m:
	Protected Subgroup '[1mWHI[0m', [34m3.21%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m[0m with effectiveness [32m72.97%[39m.
		[1mAggregate cost[0m of the above recourses = [35m0.03[39m
	Protected Subgroup '[1mBLK[0m', [34m1.93%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m, [31mSEX CODE CD = M[39m[0m with effectiveness [32m70.37%[39m.
		[1mAggregate cost[0m of the above recourses = [35m100.03[39m
	[35mBias against BLK. Unfairness score = 100.0.[39m
If [1mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 0, PREDICTOR RAT NARCOTIC ARRESTS = 1, PREDICTOR RAT UUW ARRESTS = 0, PREDICTOR RAT VICTIM SHOOTING INCIDENTS = 0, SEX CODE CD = F[0m:
	Protected Subgroup '[1mWHI[0m', [34m3.21%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m[0m with effectiveness

## Number of Rules Above Threshold -- Equal Choice for Recourse

In [9]:
top_rules, subgroup_costs = facts.select_rules_subset(
    rules_with_atomic_correctness,
    metric="num-above-thr",
    sort_strategy="generic-sorting-ignore-forall-subgroups-empty",
    top_count=50,
    cor_threshold=0.7,
    filter_sequence = [
        "remove-contained",
        "remove-below-thr",
        # "remove-fair-rules"
    ],
    params=params
)

print_recourse_report(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True
)

If [1mPREDICTOR RAT GANG AFFILIATION = 0, PREDICTOR RAT NARCOTIC ARRESTS = 0, PREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.3, 7.3], SEX CODE CD = M[0m:
	Protected Subgroup '[1mWHI[0m', [34m11.88%[39m covered
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-8.200999999999999, -0.3][39m[0m with effectiveness [32m88.32%[39m.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.1, 0.1][39m[0m with effectiveness [32m79.20%[39m.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.1, 0.3][39m[0m with effectiveness [32m79.20%[39m.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.2, -0.1][39m[0m with effectiveness [32m79.20%[39m.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-8.200999999999999, -0.3][39m, [31mSEX CODE CD = F[39m[0m with effectiveness [32m88.32%[39m.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.1, 0.1][39m, [31mSEX CODE CD = F[39m[0m with effectiveness [32m79.20%[39m.

# New Metrics

In [10]:
# rules_with_cumulative = facts.cum_corr_costs_all(rules_by_if, X_test, model, sensitive_attribute="race", params=params)

In [11]:
# from facts.utils import load_rules_by_if, save_rules_by_if
# save_rules_by_if("rulesCompas_cumulative.data", rules_with_cumulative)

## Fairness of Mean Recourse Cost ($c_\infty$ implementation) -- Equal Mean Recourse

In [12]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="fairness-of-mean-recourse-cinf",
    c_inf=c_inf,
    sort_strategy="generic-sorting",
    top_count=top_count,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules"
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True
)

If [1mPREDICTOR RAT GANG AFFILIATION = 0, PREDICTOR RAT NARCOTIC ARRESTS = 0, PREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.2, -0.1], PREDICTOR RAT UUW ARRESTS = 0, PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0, SEX CODE CD = M[0m:
	Protected Subgroup '[1mWHI[0m', [34m1.91%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 1[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 0.03.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.1, 0.1][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.1, 0.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.3, 7.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.3, -0.2][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0

## Fairness of Mean Recourse Cost (conditional expectation implementation) -- Equal Conditional Mean Recourse

In [13]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="fairness-of-mean-recourse-conditional",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules"
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True
)

If [1mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 0, PREDICTOR RAT GANG AFFILIATION = 0, PREDICTOR RAT NARCOTIC ARRESTS = 1, PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0, PREDICTOR RAT VICTIM SHOOTING INCIDENTS = 0, SEX CODE CD = F[0m:
	Protected Subgroup '[1mWHI[0m', [34m2.69%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m[0m with effectiveness [32m83.87%[39m and counterfactual cost = 0.03.
		Make [1m[31mSEX CODE CD = M[39m[0m with effectiveness [32m83.87%[39m and counterfactual cost = 100.0.
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 2[39m, [31mSEX CODE CD = M[39m[0m with effectiveness [32m83.87%[39m and counterfactual cost = 100.03.
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m, [31mSEX CODE CD = M[39m[0m with effectiveness [32m83.87%[39m and counterfactual cost = 100.03.
		Make [1m[31mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 1[39m, [31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m, [31mSEX CODE CD = M[39m[0m with effective

## Fairness of Recourse at Effectiveness Level -- Equal Cost of Effectiveness (Micro)

In [14]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="min-above-corr",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    cor_threshold = cor_thres,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
        # "keep-cheap-rules-above-thr-cor",
        # "keep-only-min-change",
        
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True
)

If [1mPREDICTOR RAT NARCOTIC ARRESTS = 1, SEX CODE CD = F[0m:
	Protected Subgroup '[1mWHI[0m', [34m3.38%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m[0m with effectiveness [32m70.51%[39m and counterfactual cost = 0.03.
		Make [1m[31mSEX CODE CD = M[39m[0m with effectiveness [32m70.51%[39m and counterfactual cost = 100.0.
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 2[39m, [31mSEX CODE CD = M[39m[0m with effectiveness [32m70.51%[39m and counterfactual cost = 100.03.
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m, [31mSEX CODE CD = M[39m[0m with effectiveness [32m71.79%[39m and counterfactual cost = 100.03.
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 3[39m, [31mSEX CODE CD = M[39m[0m with effectiveness [32m71.79%[39m and counterfactual cost = 100.07.
		[1mAggregate cost[0m of the above recourses = [35m0.03[39m
	Protected Subgroup '[1mBLK[0m', [34m2.50%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRES

## Fairness of Effectiveness -- Equal Effectiveness

In [15]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="total-correctness",
    sort_strategy="generic-sorting-ignore-forall-subgroups-empty",
    top_count=top_count,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
        # "keep-only-min-change"
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True,
    correctness_metric=True
)

If [1mPREDICTOR RAT GANG AFFILIATION = 0, PREDICTOR RAT NARCOTIC ARRESTS = 0, PREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.2, -0.1], PREDICTOR RAT UUW ARRESTS = 0, PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0[0m:
	Protected Subgroup '[1mWHI[0m', [34m2.21%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 1[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 0.03.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.1, 0.1][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.1, 0.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.3, 7.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.3, -0.2][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31

## Fairness of Recourse (using two-sample Kolmogorov-Smirnov test) -- Fair Effectiveness-Cost Trade-Off

In [16]:
# taken from the other notebooks
affected_pop_sizes = {"BLK": 8072, "WHI": 3271}

top_rules,unfairness = facts.select_rules_subset_KStest(
    rules_with_cumulative_correctness,
    affected_pop_sizes,
    top_count=top_count
)

print_recourse_report_KStest_cumulative(
    top_rules,
    population_sizes=affected_pop_sizes,
    unfairness = unfairness,
    show_then_costs=True
    # show_cumulative_plots=True
)

If [1mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 0, PREDICTOR RAT GANG AFFILIATION = 0, PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0, PREDICTOR RAT VICTIM SHOOTING INCIDENTS = 0, SEX CODE CD = F[0m:
	Protected Subgroup '[1mWHI[0m', [34m5.81%[39m covered out of 3271
		Make [1m[31mSEX CODE CD = M[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 100.0.
		Make [1m[31mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 1[39m, [31mSEX CODE CD = M[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 100.08.
		Make [1m[31mPREDICTOR RAT GANG AFFILIATION = 1[39m, [31mSEX CODE CD = M[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 101.0.
		Make [1m[31mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 1[39m, [31mPREDICTOR RAT GANG AFFILIATION = 1[39m, [31mSEX CODE CD = M[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 101.08.
	Protected Subgroup '[1mBLK[0m', [34m11.62%[39m covered out of 8072
		Make [1m[31mSEX C

## Fairness of Effectiveness at Recourse Budget -- Equal Effectiveness within Budget (Micro)

In [17]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="max-upto-cost",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    cost_threshold = cost_budget,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
        # "remove-above-thr-cost"
        #"remove-below-thr",
        #"keep-only-min-change",
        
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True,
    correctness_metric = True
)

If [1mPREDICTOR RAT GANG AFFILIATION = 0, PREDICTOR RAT NARCOTIC ARRESTS = 0, PREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.2, -0.1], PREDICTOR RAT UUW ARRESTS = 0, PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0[0m:
	Protected Subgroup '[1mWHI[0m', [34m2.21%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 1[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 0.03.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.1, 0.1][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.1, 0.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.3, 7.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.3, -0.2][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31