In [1]:
import pandas as pd
import numpy as np

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)\

import facts
from facts.clean import clean_dataset
from facts import valid_ifthens_with_coverage_correctness, rules2rulesbyif
from facts.models import customLogisticRegression
from facts.parameters import ParameterProxy
from facts.formatting import recourse_report_reverse, print_recourse_report, print_recourse_report_cumulative, print_recourse_report_KStest_cumulative
from facts.utils import load_rules_by_if
from aif360.sklearn.datasets import fetch_compas

In [2]:
cor_thres = 0.5
cost_budget = 10
top_count = 20
c_inf = 5

In [3]:
urlfile= "https://raw.githubusercontent.com/samuel-yeom/fliptest/master/exact-ot/chicago-ssl-clean.csv"

X = pd.read_csv(urlfile)
sensitive_attribute = "SEX CODE CD"
df = clean_dataset(X,'SSL')
df.head()

Unnamed: 0,SSL SCORE,PREDICTOR RAT AGE AT LATEST ARREST,PREDICTOR RAT VICTIM SHOOTING INCIDENTS,PREDICTOR RAT VICTIM BATTERY OR ASSAULT,PREDICTOR RAT ARRESTS VIOLENT OFFENSES,PREDICTOR RAT GANG AFFILIATION,PREDICTOR RAT NARCOTIC ARRESTS,PREDICTOR RAT TREND IN CRIMINAL ACTIVITY,PREDICTOR RAT UUW ARRESTS,SEX CODE CD,RACE CODE CD
0,0,20-30,0,1,4,1,1,"(0.3, 7.3]",1,M,BLK
1,0,20-30,0,9,1,1,0,"(0.3, 7.3]",0,M,WHI
2,0,10-20,1,2,1,1,3,"(0.3, 7.3]",0,M,WHI
3,0,10-20,1,1,4,1,0,"(0.3, 7.3]",1,M,BLK
4,0,10-20,0,0,5,0,0,"(0.3, 7.3]",3,M,BLK


In [4]:
y = df['SSL SCORE']
X = df.drop('SSL SCORE', axis=1)

num_features = X._get_numeric_data().columns.to_list()
cate_features = X.select_dtypes(include=['object','category']).columns.to_list()
ord_features=[]

In [5]:
rules_with_atomic_correctness = load_rules_by_if("rulesSSL_atomic.data")
rules_with_cumulative_correctness = load_rules_by_if("rulesSSL_cumulative.data")

# Old Metrics

In [6]:
feature_weights = {'PREDICTOR RAT AGE AT LATEST ARREST':10,	'PREDICTOR RAT VICTIM SHOOTING INCIDENTS':1,	
'PREDICTOR RAT VICTIM BATTERY OR ASSAULT':1,	'PREDICTOR RAT ARRESTS VIOLENT OFFENSES':1,	'PREDICTOR RAT GANG AFFILIATION':1,
'PREDICTOR RAT NARCOTIC ARRESTS':1,	'PREDICTOR RAT TREND IN CRIMINAL ACTIVITY':1,	'PREDICTOR RAT UUW ARRESTS':1,	'SEX CODE CD':100,	'RACE CODE CD':100}

features_with_binary_cost = cate_features
features_with_proportional_cost = num_features


comparators = facts.feature_change_builder(
    X,
    num_cols=features_with_proportional_cost,
    cate_cols=features_with_binary_cost,
    ord_cols=ord_features,
    feature_weights=feature_weights,
    num_normalization=True,
    #feats_to_normalize = ["capital-gain","capital-loss"]
)
params = ParameterProxy(featureChanges=comparators)

## Weighted Average

In [7]:
top_rules, subgroup_costs = facts.select_rules_subset(
    rules_with_atomic_correctness,
    metric = "weighted-average",
    sort_strategy = "abs-diff-decr",
    top_count = 50,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
    ],
    params=params
)

print_recourse_report(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True
)

If [1mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 0, PREDICTOR RAT NARCOTIC ARRESTS = 3, PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0, PREDICTOR RAT VICTIM SHOOTING INCIDENTS = 0[0m:
	Protected Subgroup '[1mBLK[0m', [34m1.18%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m[0m with effectiveness [32m54.26%[39m.
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 1[39m[0m with effectiveness [32m45.74%[39m.
		Make [1m[31mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 1[39m, [31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m[0m with effectiveness [32m0.00%[39m.
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 2[39m[0m with effectiveness [32m43.62%[39m.
		[1mAggregate cost[0m of the above recourses = [35m-6.13[39m
	Protected Subgroup '[1mWHI[0m', [34m1.14%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 0[39m[0m with effectiveness [32m44.00%[39m.
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 1[39m[0m with effectiveness [32m32.00%[39m.


## Minimum Cost Above Threshold -- Equal Cost of Effectiveness (Macro)

In [8]:
top_rules, subgroup_costs = facts.select_rules_subset(
    rules_with_atomic_correctness,
    metric="min-above-thr",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=50,
    cor_threshold=0.7,
    filter_sequence = [
        "remove-contained",
        "remove-below-thr",        
        # "remove-fair-rules",
        "keep-only-min-change"
    ],
    params=params
)

print_recourse_report(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True
)

If [1mPREDICTOR RAT AGE AT LATEST ARREST = 10-20, PREDICTOR RAT ARRESTS VIOLENT OFFENSES = 1, PREDICTOR RAT GANG AFFILIATION = 0, PREDICTOR RAT NARCOTIC ARRESTS = 0, PREDICTOR RAT UUW ARRESTS = 0[0m:
	Protected Subgroup '[1mBLK[0m', [34m12.56%[39m covered
		Make [1m[31mPREDICTOR RAT AGE AT LATEST ARREST = 20-30[39m, [31mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 0[39m[0m with effectiveness [32m97.10%[39m.
		Make [1m[31mPREDICTOR RAT AGE AT LATEST ARREST = 30-40[39m, [31mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 0[39m[0m with effectiveness [32m99.90%[39m.
		Make [1m[31mPREDICTOR RAT AGE AT LATEST ARREST = 40-50[39m, [31mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 0[39m[0m with effectiveness [32m100.00%[39m.
		Make [1m[31mPREDICTOR RAT AGE AT LATEST ARREST = 50-60[39m, [31mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 0[39m[0m with effectiveness [32m100.00%[39m.
		Make [1m[31mPREDICTOR RAT AGE AT LATEST ARREST = 60-70[39m, [31mPREDICTOR RAT ARRESTS VIOLENT 

## Number of Rules Above Threshold -- Equal Choice for Recourse

In [9]:
top_rules, subgroup_costs = facts.select_rules_subset(
    rules_with_atomic_correctness,
    metric="num-above-thr",
    sort_strategy="generic-sorting-ignore-forall-subgroups-empty",
    top_count=50,
    cor_threshold=0.7,
    filter_sequence = [
        "remove-contained",
        "remove-below-thr",
        # "remove-fair-rules"
    ],
    params=params
)

print_recourse_report(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True
)

If [1mPREDICTOR RAT GANG AFFILIATION = 0, PREDICTOR RAT NARCOTIC ARRESTS = 0, PREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.3, 7.3], SEX CODE CD = M[0m:
	Protected Subgroup '[1mBLK[0m', [34m7.04%[39m covered
		[31mNo recourses for this subgroup![39m
		[1mAggregate cost[0m of the above recourses = [35m0.00[39m
	Protected Subgroup '[1mWHI[0m', [34m12.31%[39m covered
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-8.200999999999999, -0.3][39m[0m with effectiveness [32m91.11%[39m.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.1, 0.1][39m[0m with effectiveness [32m82.96%[39m.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.2, -0.1][39m[0m with effectiveness [32m82.96%[39m.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.1, 0.3][39m[0m with effectiveness [32m82.96%[39m.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-8.200999999999999, -0.3][39m, [31mSEX CODE CD = F[39m[0m with effecti

# New Metrics

In [10]:
# rules_with_cumulative = facts.cum_corr_costs_all(rules_by_if, X_test, model, sensitive_attribute="race", params=params)

In [11]:
# from facts.utils import load_rules_by_if, save_rules_by_if
# save_rules_by_if("rulesCompas_cumulative.data", rules_with_cumulative)

## Fairness of Mean Recourse Cost ($c_\infty$ implementation) -- Equal Mean Recourse

In [12]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="fairness-of-mean-recourse-cinf",
    c_inf=c_inf,
    sort_strategy="generic-sorting",
    top_count=top_count,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules"
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True
)

If [1mPREDICTOR RAT GANG AFFILIATION = 0, PREDICTOR RAT NARCOTIC ARRESTS = 0, PREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.2, -0.1], PREDICTOR RAT UUW ARRESTS = 0, PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0, SEX CODE CD = M[0m:
	Protected Subgroup '[1mBLK[0m', [34m1.13%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 1[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 0.03.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-8.200999999999999, -0.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.1, 0.1][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.1, 0.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.3, 7.3][39m[0m with effectiveness [32m0.00%[39m and counterfact

## Fairness of Mean Recourse Cost (conditional expectation implementation) -- Equal Conditional Mean Recourse

In [13]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="fairness-of-mean-recourse-conditional",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules"
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True
)

If [1mPREDICTOR RAT AGE AT LATEST ARREST = 10-20, PREDICTOR RAT GANG AFFILIATION = 0, PREDICTOR RAT NARCOTIC ARRESTS = 0, PREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.2, -0.1], PREDICTOR RAT UUW ARRESTS = 0, PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0[0m:
	Protected Subgroup '[1mBLK[0m', [34m1.67%[39m covered
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-8.200999999999999, -0.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.1, 0.1][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.1, 0.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT AGE AT LATEST ARREST = 20-30[39m[0m with effectiveness [32m97.74%[39m and counterfactual cost = 10.0.
		Make [1m[31mPREDICTOR RAT AGE AT LATEST ARREST = 30-40[39m[0m with effectiveness [32m1

## Fairness of Recourse at Effectiveness Level -- Equal Cost of Effectiveness (Micro)

In [14]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="min-above-corr",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    cor_threshold = cor_thres,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
        # "keep-cheap-rules-above-thr-cor",
        # "keep-only-min-change",
        
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True
)

If [1mPREDICTOR RAT AGE AT LATEST ARREST = 10-20, PREDICTOR RAT ARRESTS VIOLENT OFFENSES = 0, PREDICTOR RAT UUW ARRESTS = 0, PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 1[0m:
	Protected Subgroup '[1mBLK[0m', [34m5.68%[39m covered
		Make [1m[31mPREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0[39m[0m with effectiveness [32m38.85%[39m and counterfactual cost = 0.1.
		Make [1m[31mPREDICTOR RAT AGE AT LATEST ARREST = 20-30[39m, [31mPREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0[39m[0m with effectiveness [32m94.92%[39m and counterfactual cost = 10.1.
		Make [1m[31mPREDICTOR RAT AGE AT LATEST ARREST = 30-40[39m, [31mPREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0[39m[0m with effectiveness [32m99.56%[39m and counterfactual cost = 10.1.
		Make [1m[31mPREDICTOR RAT AGE AT LATEST ARREST = 40-50[39m, [31mPREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0[39m[0m with effectiveness [32m100.00%[39m and counterfactual cost = 10.1.
		Make [1m[31mPREDICTOR RAT AGE AT LATEST ARREST = 50-

## Fairness of Effectiveness -- Equal Effectiveness

In [15]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="total-correctness",
    sort_strategy="generic-sorting-ignore-forall-subgroups-empty",
    top_count=top_count,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
        # "keep-only-min-change"
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True,
    correctness_metric=True
)

If [1mPREDICTOR RAT GANG AFFILIATION = 0, PREDICTOR RAT NARCOTIC ARRESTS = 0, PREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.2, -0.1], PREDICTOR RAT UUW ARRESTS = 0, PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0[0m:
	Protected Subgroup '[1mBLK[0m', [34m1.68%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 1[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 0.03.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-8.200999999999999, -0.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.1, 0.1][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.1, 0.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.3, 7.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
	

## Fairness of Recourse (using two-sample Kolmogorov-Smirnov test) -- Fair Effectiveness-Cost Trade-Off

In [16]:
# taken from the other notebooks
affected_pop_sizes = {"BLK": 8072, "WHI": 3271}

top_rules,unfairness = facts.select_rules_subset_KStest(
    rules_with_cumulative_correctness,
    affected_pop_sizes,
    top_count=top_count
)

print_recourse_report_KStest_cumulative(
    top_rules,
    population_sizes=affected_pop_sizes,
    unfairness = unfairness,
    show_then_costs=True
    # show_cumulative_plots=True
)

If [1mPREDICTOR RAT ARRESTS VIOLENT OFFENSES = 0, PREDICTOR RAT NARCOTIC ARRESTS = 0, PREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.3, 7.3], PREDICTOR RAT UUW ARRESTS = 0, PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0[0m:
	Protected Subgroup '[1mBLK[0m', [34m6.02%[39m covered out of 8072
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 1[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 0.03.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.1, 0.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-8.200999999999999, -0.3][39m[0m with effectiveness [32m99.58%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.1, 0.1][39m[0m with effectiveness [32m99.58%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.2, -0.1][39m[0m with effectiveness [32m99.58%[39m and count

## Fairness of Effectiveness at Recourse Budget -- Equal Effectiveness within Budget (Micro)

In [17]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="max-upto-cost",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    cost_threshold = cost_budget,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
        # "remove-above-thr-cost"
        #"remove-below-thr",
        #"keep-only-min-change",
        
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True,
    correctness_metric = True
)

If [1mPREDICTOR RAT GANG AFFILIATION = 0, PREDICTOR RAT NARCOTIC ARRESTS = 0, PREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.2, -0.1], PREDICTOR RAT UUW ARRESTS = 0, PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0[0m:
	Protected Subgroup '[1mBLK[0m', [34m1.68%[39m covered
		Make [1m[31mPREDICTOR RAT NARCOTIC ARRESTS = 1[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 0.03.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-8.200999999999999, -0.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (-0.1, 0.1][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.1, 0.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mPREDICTOR RAT TREND IN CRIMINAL ACTIVITY = (0.3, 7.3][39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
	

## Get ranking of subgroups based on metrics

In [18]:
rules_with_both = {}
for ifc, all_thens in rules_with_cumulative_correctness.items():
    new_all_thens = {}
    for sg, (cov, thens_cum) in all_thens.items():
        thens_atom = rules_with_atomic_correctness[ifc][sg][1]
        thens_atom_dict = {then: atom_cor for then, atom_cor in thens_atom}
        new_all_thens[sg] = (cov, [(then, thens_atom_dict[then], cumcor) for then, cumcor, _cost in thens_cum])
    rules_with_both[ifc] = new_all_thens

In [19]:
from facts.fairness_metrics_aggr import make_table, auto_budget_calculation
budgets = auto_budget_calculation(rules_with_cumulative_correctness, cor_thres=0.5, percentiles=[0.3, 0.6, 0.9])
budgets

[0.15229885057471265, 1.0, 10.0]

In [20]:
df = make_table(
    rules_with_both,
    sensitive_attribute_vals=["WHI", "BLK"],
    effectiveness_thresholds=[0.3, 0.7],
    cost_budgets=budgets,
    params=params
)

df.drop(['weighted-average', 'mean-cost-cinf'], axis=1, inplace=True)

dropping on a non-lexsorted multi-index without a level parameter may impact performance.


In [21]:
df = df.loc[:, ~df.columns.duplicated()]
df.head()

Unnamed: 0_level_0,subgroup,"(Equal Cost of Effectiveness(Macro), 0.3)","(Equal Cost of Effectiveness(Macro), 0.3)","(Equal Cost of Effectiveness(Macro), 0.7)","(Equal Cost of Effectiveness(Macro), 0.7)","(Equal Choice for Recourse, 0.3)","(Equal Choice for Recourse, 0.3)","(Equal Choice for Recourse, 0.7)","(Equal Choice for Recourse, 0.7)",Equal Effectiveness,...,"(Equal Effectiveness within Budget, 10.0)","(Equal Effectiveness within Budget, 10.0)","(Equal Cost of Effectiveness(Micro), 0.3)","(Equal Cost of Effectiveness(Micro), 0.3)","(Equal Cost of Effectiveness(Micro), 0.7)","(Equal Cost of Effectiveness(Micro), 0.7)",Equal(Conditional Mean Recourse),Equal(Conditional Mean Recourse),Fair Effectiveness-Cost Trade-Off,Fair Effectiveness-Cost Trade-Off
Unnamed: 0_level_1,subgroup,WHI,BLK,WHI,BLK,WHI,BLK,WHI,BLK,WHI,...,WHI,BLK,WHI,BLK,WHI,BLK,WHI,BLK,value,bias
0,PREDICTOR RAT UUW ARRESTS = 0,inf,inf,inf,inf,0,0,0,0,0.0,...,0.0,0.0,inf,inf,inf,inf,inf,inf,0.0,BLK
1,SEX CODE CD = M,inf,inf,inf,inf,0,0,0,0,0.0,...,inf,inf,inf,inf,inf,inf,inf,inf,0.0,BLK
2,PREDICTOR RAT AGE AT LATEST ARREST = 10-20,10.0,10.0,10.0,10.0,-5,-5,-5,-5,0.982002,...,0.982002,0.992935,10.0,10.0,10.0,10.0,10.0,10.0,0.059872,BLK
3,PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0,inf,inf,inf,inf,0,0,0,0,0.0,...,0.0,0.0,inf,inf,inf,inf,inf,inf,0.0,BLK
4,"PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0, P...",inf,inf,inf,inf,0,0,0,0,0.0,...,0.0,0.0,inf,inf,inf,inf,inf,inf,0.0,BLK


In [30]:
from facts.fairness_metrics_aggr import get_diff_table, get_comb_df, get_analysis_df
sensitive_attribute_vals=["WHI", "BLK"]
#table of differences between Male and Female for each metric
diff = get_diff_table(df, sensitive_attribute_vals=["WHI", "BLK"], with_abs = True)
diff = diff.set_index('subgroup')
ranked = diff.drop(columns=[('Fair Effectiveness-Cost Trade-Off','bias')]).mask(diff == 0).rank(ascending = False,axis=0,method='dense')
ranked = ranked.replace(np.nan,"Fair")

dropping on a non-lexsorted multi-index without a level parameter may impact performance.


In [31]:
rev_bias_metrics = ['Equal Effectiveness', 'Equal Effectiveness within Budget']
comb_df = get_comb_df(df, ranked, diff, rev_bias_metrics, sensitive_attribute_vals)
analysis_df = get_analysis_df(comb_df, sensitive_attribute_vals)

dropping on a non-lexsorted multi-index without a level parameter may impact performance.
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


In [32]:
comb_df.head()

Unnamed: 0_level_0,"(Equal Cost of Effectiveness(Macro), 0.3)","(Equal Cost of Effectiveness(Macro), 0.3)","(Equal Cost of Effectiveness(Macro), 0.3)","(Equal Cost of Effectiveness(Macro), 0.7)","(Equal Cost of Effectiveness(Macro), 0.7)","(Equal Cost of Effectiveness(Macro), 0.7)","(Equal Choice for Recourse, 0.3)","(Equal Choice for Recourse, 0.3)","(Equal Choice for Recourse, 0.3)","(Equal Choice for Recourse, 0.7)",...,"(Equal Cost of Effectiveness(Micro), 0.3)","(Equal Cost of Effectiveness(Micro), 0.7)","(Equal Cost of Effectiveness(Micro), 0.7)","(Equal Cost of Effectiveness(Micro), 0.7)",Equal(Conditional Mean Recourse),Equal(Conditional Mean Recourse),Equal(Conditional Mean Recourse),Fair Effectiveness-Cost Trade-Off,Fair Effectiveness-Cost Trade-Off,Fair Effectiveness-Cost Trade-Off
Unnamed: 0_level_1,rank,score,bias against,rank,score,bias against,rank,score,bias against,rank,...,bias against,rank,score,bias against,rank,score,bias against,rank,score,bias against
subgroup,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
PREDICTOR RAT UUW ARRESTS = 0,Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,...,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,BLK
SEX CODE CD = M,Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,...,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,BLK
PREDICTOR RAT AGE AT LATEST ARREST = 10-20,Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,...,Fair,Fair,0.0,Fair,3921.0,3.552714e-15,BLK,4222.0,0.059872,BLK
PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0,Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,...,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,BLK
"PREDICTOR RAT VICTIM BATTERY OR ASSAULT = 0, PREDICTOR RAT VICTIM SHOOTING INCIDENTS = 0",Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,...,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,BLK


In [33]:
analysis_df

Unnamed: 0,Rank = 1 Count,WHI bias against Count,BLK bias against Count
"(Equal Cost of Effectiveness(Macro), 0.7)",627,1435,139
"(Equal Cost of Effectiveness(Micro), 0.7)",627,1435,139
"(Equal Cost of Effectiveness(Macro), 0.3)",371,1017,262
"(Equal Cost of Effectiveness(Micro), 0.3)",371,1017,262
Equal(Conditional Mean Recourse),116,3489,1361
"(Equal Choice for Recourse, 0.7)",16,2678,399
Fair Effectiveness-Cost Trade-Off,106,1072,5479
"(Equal Effectiveness within Budget, 0.15229885057471265)",40,1987,497
"(Equal Effectiveness within Budget, 1.0)",32,4090,963
Equal Effectiveness,4,695,3311
