Datasets used: 
- `adult (Adult)` :<br>The UCI Adult dataset contains 14 features, which can be divided into two categories: demographic and income-related.<br> The demographic features include:

    age: continuous.<br>
    fnlwgt : continuous, represents final weight, which is the number of units in the target population that the responding unit represents.<br>
    workclass: categorical, with values 'Private', 'Local-gov', 'Self-emp-not-inc', 'Federal-gov', 'State-gov', 'Self-emp-inc', 'Without-pay'.<br>
    education: categorical, with values '11th', 'HS-grad', 'Assoc-acdm', 'Some-college', '10th', ..., '9th', 'Doctorate', '12th', '1st-4th', 'Preschool'.<br>
    education-num: continuous with values 1 to 16, one number assigned to each label of education feature.<br>
    marital-status: categorical, with values 'Never-married', 'Married-civ-spouse', 'Widowed', 'Separated', 'Divorced', 'Married-spouse-absent', 'Married-AF-spouse'.<br>
    occupation: categorical, with values such as 'Machine-op-inspct', 'Farming-fishing', 'Protective-serv'.<br>
    relationship: categorical, with values 'Own-child', 'Husband', 'Not-in-family', 'Unmarried', 'Wife', 'Other-relative'.<br>
    race: categorical, with values such as white, black, and Asian.<br>
    sex: categorical, with values male and female.<br>
    
    The income-related features include:<br>

    hours-per-week: continuous.<br>
    native-country: categorical, with values such as United-States, Mexico, and Germany.<br>
    capital-gain: continuous, represent the amount of money an individual has gained from the sale of investments such as stocks, bonds, or real estate.<br>
    capital-loss: continuous, represent the amount of money an individual has lost from the sale of investments such as stocks, bonds, or real estate..<br>
    The target feature is the income, which is binary:<br>
    income: categorical, with values less than or equal to 50K and greater than 50K.<br>

Reference Links: https://archive.ics.uci.edu/ml/datasets/adult

In [1]:
import pandas as pd
import numpy as np

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import facts
from facts.clean import clean_dataset
from facts import valid_ifthens_with_coverage_correctness, rules2rulesbyif
from facts.models import customLogisticRegression
from facts.parameters import ParameterProxy
from facts.formatting import recourse_report_reverse, print_recourse_report, print_recourse_report_cumulative, print_recourse_report_KStest_cumulative
from facts.utils import load_rules_by_if
from facts.fairness_metrics_aggr import get_diff_table, get_comb_df, get_analysis_dfs

In [2]:
cor_thres = 0.5
cost_budget = 10
top_count = 20
c_inf = 5

# Dataset

In [3]:
urlfile= "https://raw.githubusercontent.com/columbia/fairtest/master/data/adult/adult.csv"

X = pd.read_csv(urlfile)
df = clean_dataset(X, "adult")

age = [val.left for val in df.age.unique()]
age.sort()

df.head()

Unnamed: 0,age,Workclass,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,"(34.0, 41.0]",State-gov,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,FullTime,United-States,0
1,"(41.0, 50.0]",Self-emp-not-inc,13,Married-civ-spouse,Exec-managerial,Married,White,Male,0,0,PartTime,United-States,0
2,"(34.0, 41.0]",Private,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,FullTime,United-States,0
3,"(50.0, 90.0]",Private,7,Married-civ-spouse,Handlers-cleaners,Married,Black,Male,0,0,FullTime,United-States,0
4,"(26.0, 34.0]",Private,13,Married-civ-spouse,Prof-specialty,Married,Black,Female,0,0,FullTime,Cuba,0


In [4]:
y = df['income']
X = df.drop('income', axis=1)

num_features = X._get_numeric_data().columns.to_list()
cate_features = X.select_dtypes(include=['object','category']).columns.to_list()
ord_features = ['hours-per-week']

# Rules

In [5]:
rules_with_atomic_correctness, Xtest, model = load_rules_by_if("rules-Xtest-model-2_race (seed 131313).data")
rules_with_cumulative_correctness = load_rules_by_if("rulesAdultFairTest_race_cumulative (seed 131313).data")

In [6]:
feature_weights = {"race" : 100 , "sex": 100, "marital-status": 5, "relationship":5, "age": 10, "occupation": 4, "Workclass": 2, "native-country": 4,
                    "hours-per-week":2, "capital-gain": 1, "capital-loss":1, "education-num": 3}
features_with_binary_cost = cate_features
features_with_proportional_cost = num_features


comparators = facts.feature_change_builder(
    X,
    num_cols=features_with_proportional_cost,
    cate_cols=features_with_binary_cost,
    ord_cols=ord_features,
    feature_weights=feature_weights,
    num_normalization=True,
    feats_to_normalize = ["capital-gain","capital-loss"]
)
params = ParameterProxy(featureChanges=comparators)

# Macro Viewpoint

## Minimum Cost Above Threshold -- Equal Cost of Effectiveness (Macro)

In [7]:
top_rules, subgroup_costs = facts.select_rules_subset(
    rules_with_atomic_correctness,
    metric="min-above-thr",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    cor_threshold=cor_thres,
    filter_sequence = [
        # "remove-contained",
        # "remove-below-thr",        
        # "remove-fair-rules",
        # "keep-only-min-change"
    ],
    params=params
)

print_recourse_report(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True
)

If [1mWorkclass =  Private, age = (34.0, 41.0], capital-loss = 0, hours-per-week = FullTime, marital-status =  Never-married, relationship =  Not-in-family[0m:
	Protected Subgroup '[1m Non-White[0m', [34m1.09%[39m covered
		Make [1m[31mage = (41.0, 50.0][39m, [31mhours-per-week = OverTime[39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m55.56%[39m.
		Make [1m[31mage = (41.0, 50.0][39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m50.00%[39m.
		Make [1m[31mhours-per-week = OverTime[39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m50.00%[39m.
		Make [1m[31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m50.00%[39m.
		Make [1m[31mage = (50.0, 90.0][39m, [31mhours-per-week = OverTime[39m, [31mmarital-status =  M

## Number of Rules Above Threshold -- Equal Choice for Recourse

In [8]:
top_rules, subgroup_costs = facts.select_rules_subset(
    rules_with_atomic_correctness,
    metric="num-above-thr",
    sort_strategy="generic-sorting-ignore-forall-subgroups-empty",
    top_count=top_count,
    cor_threshold=cor_thres,
    filter_sequence = [
        # "remove-contained",
        # "remove-below-thr",
        # "remove-fair-rules"
    ],
    params=params
)

print_recourse_report(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True
)

If [1mWorkclass =  Private, hours-per-week = OverTime, marital-status =  Never-married, relationship =  Not-in-family, sex =  Male[0m:
	Protected Subgroup '[1m Non-White[0m', [34m1.09%[39m covered
		Make [1m[31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m61.11%[39m.
		Make [1m[31mhours-per-week = FullTime[39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m50.00%[39m.
		Make [1m[31mhours-per-week = BrainDrain[39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m61.11%[39m.
		Make [1m[31mWorkclass =  Self-emp-inc[39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m61.11%[39m.
		Make [1m[31mWorkclass =  Local-gov[39m, [31mhours-per-week = FullTime[39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship 

# New Metrics

## Fairness of Mean Recourse Cost ($c_\infty$ implementation) -- Equal Mean Recourse

In [9]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="fairness-of-mean-recourse-cinf",
    c_inf=c_inf,
    sort_strategy="generic-sorting",
    top_count=top_count,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules"
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True
)

If [1mcapital-gain = 0, capital-loss = 0, education-num = 10, marital-status =  Divorced, native-country =  United-States, relationship =  Unmarried[0m:
	Protected Subgroup '[1m Non-White[0m', [34m1.81%[39m covered
		Make [1m[31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m43.33%[39m and counterfactual cost = 11.0.
		Make [1m[31meducation-num = 11[39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m66.67%[39m and counterfactual cost = 31.0.
		Make [1m[31meducation-num = 12[39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m76.67%[39m and counterfactual cost = 51.0.
		[1mAggregate cost[0m of the above recourses = [35m17.10[39m
	Protected Subgroup '[1m White[0m', [34m1.31%[39m covered
		Make [1m[31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m

## Fairness of Mean Recourse Cost (conditional expectation implementation) -- Equal Conditional Mean Recourse

In [10]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="fairness-of-mean-recourse-conditional",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules"
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True
)

If [1mage = (34.0, 41.0], capital-gain = 0, education-num = 9, marital-status =  Never-married[0m:
	Protected Subgroup '[1m Non-White[0m', [34m2.48%[39m covered
		Make [1m[31mmarital-status =  Married-civ-spouse[39m[0m with effectiveness [32m2.44%[39m and counterfactual cost = 1.0.
		Make [1m[31mage = (41.0, 50.0][39m, [31mmarital-status =  Married-civ-spouse[39m[0m with effectiveness [32m2.44%[39m and counterfactual cost = 2.0.
		Make [1m[31mage = (50.0, 90.0][39m, [31mmarital-status =  Married-civ-spouse[39m[0m with effectiveness [32m2.44%[39m and counterfactual cost = 2.0.
		Make [1m[31meducation-num = 10[39m, [31mmarital-status =  Married-civ-spouse[39m[0m with effectiveness [32m2.44%[39m and counterfactual cost = 21.0.
		Make [1m[31mage = (41.0, 50.0][39m, [31meducation-num = 10[39m, [31mmarital-status =  Married-civ-spouse[39m[0m with effectiveness [32m2.44%[39m and counterfactual cost = 22.0.
		Make [1m[31mage = (50.0, 90.0][39m, 

## Fairness of Recourse at Effectiveness Level -- Equal Cost of Effectiveness (Micro)

In [11]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="min-above-corr",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    cor_threshold = cor_thres,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
        # "keep-cheap-rules-above-thr-cor",
        # "keep-only-min-change",
        
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True
)

If [1meducation-num = 9, marital-status =  Married-civ-spouse, occupation =  Other-service[0m:
	Protected Subgroup '[1m Non-White[0m', [34m2.12%[39m covered
		Make [1m[31moccupation =  Craft-repair[39m[0m with effectiveness [32m14.29%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Adm-clerical[39m[0m with effectiveness [32m17.14%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Sales[39m[0m with effectiveness [32m20.00%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Exec-managerial[39m[0m with effectiveness [32m60.00%[39m and counterfactual cost = 1.0.
		Make [1m[31meducation-num = 10[39m, [31moccupation =  Craft-repair[39m[0m with effectiveness [32m60.00%[39m and counterfactual cost = 21.0.
		Make [1m[31meducation-num = 10[39m, [31moccupation =  Adm-clerical[39m[0m with effectiveness [32m60.00%[39m and counterfactual cost = 21.0.
		Make [1m[31meducation-num = 10[39m, [31moccupation =  Sales

## Fairness of Effectiveness -- Equal Effectiveness

In [12]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="total-correctness",
    sort_strategy="generic-sorting-ignore-forall-subgroups-empty",
    top_count=top_count,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
        # "keep-only-min-change"
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True,
    correctness_metric=True
)

If [1mage = (16.999, 26.0], capital-gain = 0, capital-loss = 0, marital-status =  Never-married, occupation =  Handlers-cleaners, relationship =  Own-child, sex =  Male[0m:
	Protected Subgroup '[1m Non-White[0m', [34m1.03%[39m covered
		Make [1m[31mage = (26.0, 34.0][39m, [31mmarital-status =  Married-civ-spouse[39m, [31moccupation =  Prof-specialty[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 13.0.
		Make [1m[31mage = (26.0, 34.0][39m, [31mmarital-status =  Married-civ-spouse[39m, [31moccupation =  Sales[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 13.0.
		Make [1m[31mage = (26.0, 34.0][39m, [31mmarital-status =  Married-civ-spouse[39m, [31moccupation =  Exec-managerial[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m11.76%[39m and counterfactual cost = 13.0.
		Make [1m[31mage = (34.0, 41.0][39m, [31mmarital-status =  

## Fairness of Recourse (using two-sample Kolmogorov-Smirnov test) -- Fair Effectiveness-Cost Trade-Off

In [13]:
# taken from the other notebooks
preds_Xtest = model.predict(Xtest)
affected_pop_sizes = {sg: ((Xtest["race"] == sg) & (preds_Xtest == 0)).sum() for sg in Xtest["race"].unique()}

top_rules,unfairness = facts.select_rules_subset_KStest(
    rules_with_cumulative_correctness,
    affected_pop_sizes,
    top_count=top_count
)

print_recourse_report_KStest_cumulative(
    top_rules,
    population_sizes=affected_pop_sizes,
    unfairness = unfairness,
    show_then_costs=True
    # show_cumulative_plots=True
)

If [1mcapital-gain = 0, capital-loss = 0, marital-status =  Never-married, relationship =  Own-child[0m:
	Protected Subgroup '[1m Non-White[0m', [34m15.43%[39m covered out of 1653
		Make [1m[31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m7.45%[39m and counterfactual cost = 11.0.
		Make [1m[31mcapital-gain = 3103[39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m25.88%[39m and counterfactual cost = 11.03.
		Make [1m[31mcapital-gain = 4386[39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m32.94%[39m and counterfactual cost = 11.04.
		Make [1m[31mcapital-gain = 5178[39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m45.10%[39m and counterfactual cost = 11.05.
		Make [1m[31mcapital-gain = 7298[39m, [31mmarital-stat

## Fairness of Effectiveness at Recourse Budget -- Equal Effectiveness within Budget (Micro)

In [14]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="max-upto-cost",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    cost_threshold = cost_budget,
    filter_sequence = [
        # "remove-contained",
        # "remove-fair-rules",
        # "remove-above-thr-cost"
        #"remove-below-thr",
        #"keep-only-min-change",
        
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True,
    correctness_metric = True
)

If [1mcapital-gain = 0, capital-loss = 0, education-num = 9, marital-status =  Married-civ-spouse, occupation =  Other-service[0m:
	Protected Subgroup '[1m Non-White[0m', [34m2.00%[39m covered
		Make [1m[31moccupation =  Exec-managerial[39m[0m with effectiveness [32m57.58%[39m and counterfactual cost = 1.0.
		Make [1m[31meducation-num = 10[39m, [31moccupation =  Sales[39m[0m with effectiveness [32m57.58%[39m and counterfactual cost = 21.0.
		Make [1m[31meducation-num = 10[39m, [31moccupation =  Exec-managerial[39m[0m with effectiveness [32m66.67%[39m and counterfactual cost = 21.0.
		[1mAggregate cost[0m of the above recourses = [35m0.58[39m
	Protected Subgroup '[1m White[0m', [34m1.00%[39m covered
		Make [1m[31moccupation =  Exec-managerial[39m[0m with effectiveness [32m17.78%[39m and counterfactual cost = 1.0.
		Make [1m[31meducation-num = 10[39m, [31moccupation =  Sales[39m[0m with effectiveness [32m17.78%[39m and counterfactual cost

# Gather all metrics in a table (under construction)

In [15]:
from facts.fairness_metrics_aggr import make_table, auto_budget_calculation

In [16]:
rules_with_atomic_correctness.keys() == rules_with_cumulative_correctness.keys()

True

In [17]:
thens1 = {ifc: {sg: (cov, sorted([t for t, _ in thens])) for sg, (cov, thens) in thencs.items()} for ifc, thencs in rules_with_atomic_correctness.items()}
thens2 = {ifc: {sg: (cov, sorted([t for t, _, _ in thens])) for sg, (cov, thens) in thencs.items()} for ifc, thencs in rules_with_cumulative_correctness.items()}

thens1 == thens2

True

In [18]:
rules_with_both = {}
for ifc, all_thens in rules_with_cumulative_correctness.items():
    new_all_thens = {}
    for sg, (cov, thens_cum) in all_thens.items():
        thens_atom = rules_with_atomic_correctness[ifc][sg][1]
        thens_atom_dict = {then: atom_cor for then, atom_cor in thens_atom}
        new_all_thens[sg] = (cov, [(then, thens_atom_dict[then], cumcor) for then, cumcor, _cost in thens_cum])
    rules_with_both[ifc] = new_all_thens

In [19]:
budgets = auto_budget_calculation(rules_with_cumulative_correctness, cor_thres=0.5, percentiles=[0.3, 0.6, 0.9])
budgets

[1.150241502415024, 10.150241502415025, 21.0]

In [20]:
df = make_table(
    rules_with_both,
    sensitive_attribute_vals=[" Non-White", " White"],
    effectiveness_thresholds=[0.3, 0.7],
    cost_budgets=budgets,
    params=params
)

In [21]:
# dropping 'weighted-average' and 'mean-cost-cinf'
df.drop(['weighted-average', 'mean-cost-cinf'], axis=1, inplace=True)

  df.drop(['weighted-average', 'mean-cost-cinf'], axis=1, inplace=True)


## 2-level index: Examples of usage

In [22]:
df["subgroup"]

Unnamed: 0,subgroup
0,capital-loss = 0
1,capital-gain = 0
2,"capital-gain = 0, capital-loss = 0"
3,native-country = United-States
4,"capital-loss = 0, native-country = United-States"
...,...
16636,"Workclass = Private, age = (16.999, 26.0], ca..."
16637,"hours-per-week = PartTime, marital-status = N..."
16638,"Workclass = Private, hours-per-week = PartTim..."
16639,"Workclass = Private, capital-gain = 0, capita..."


In [23]:
df[('Equal Cost of Effectiveness(Macro)', 0.3), " Non-White"]

0             inf
1        0.072981
2        0.072981
3             inf
4             inf
           ...   
16636         inf
16637         inf
16638         inf
16639         inf
16640         inf
Name: ((Equal Cost of Effectiveness(Macro), 0.3),  Non-White), Length: 16641, dtype: float64

In [24]:
(df[('Equal Cost of Effectiveness(Macro)', 0.3), " White"] - df[('Equal Cost of Effectiveness(Macro)', 0.3), " Non-White"]).dropna().value_counts()

 0.000000     4555
 inf           877
 2.000000      650
-inf           491
 3.000000      243
 0.150242      124
 10.000000     110
 4.000000       56
 6.000000       39
 8.000000       39
 0.072981       33
 0.077261       27
 0.076881       26
-2.000000       25
 0.073361       20
-0.072981       18
 0.073361       15
 0.031030       14
-3.000000       14
 0.007920       12
 0.433196       11
-0.433196       11
 0.076881       10
 1.000000        9
 7.000000        9
-0.077261        7
-1.000000        7
 0.029120        6
 5.000000        6
-6.000000        6
-0.150242        6
 0.021200        6
-0.021200        5
-8.000000        4
 4.076881        4
 0.076881        4
-0.072981        4
 0.433196        3
-0.003900        3
 0.020750        3
-10.000000       3
 0.003900        3
-0.073361        3
 4.150242        2
 0.076881        2
 0.072981        2
 0.007920        2
 0.029120        2
-0.003900        2
-4.000000        2
-2.150242        1
-0.031030        1
-0.003900   

## 3- Get ranking of subgroups based on metrics

In [25]:
#table of differences between Male and Female for each metric
diff = get_diff_table(df, sensitive_attribute_vals=[" White", " Non-White"], with_abs = True)
diff = diff.set_index('subgroup')
ranked = diff.drop(columns=[('Fair Effectiveness-Cost Trade-Off','bias')]).mask(diff == 0).rank(ascending = False,axis=0,method='dense')
ranked = ranked.replace(np.nan,"Fair")

  z = z.drop(columns=["Fair Effectiveness-Cost Trade-Off"])


In [26]:
ranked

Unnamed: 0_level_0,"(Equal Cost of Effectiveness(Macro), 0.3)","(Equal Cost of Effectiveness(Macro), 0.7)","(Equal Choice for Recourse, 0.3)","(Equal Choice for Recourse, 0.7)",Equal Effectiveness,"(Equal Effectiveness within Budget, 1.150241502415024)","(Equal Effectiveness within Budget, 10.150241502415025)","(Equal Effectiveness within Budget, 21.0)","(Equal Cost of Effectiveness(Micro), 0.3)","(Equal Cost of Effectiveness(Micro), 0.7)",Equal(Conditional Mean Recourse),"(Fair Effectiveness-Cost Trade-Off, value)"
subgroup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
capital-loss = 0,Fair,Fair,Fair,Fair,4443.0,12.0,3009.0,4221.0,Fair,Fair,7890.0,6536.0
capital-gain = 0,30.0,Fair,14.0,Fair,Fair,Fair,Fair,Fair,31.0,Fair,6430.0,6216.0
"capital-gain = 0, capital-loss = 0",30.0,Fair,14.0,Fair,Fair,Fair,Fair,Fair,31.0,Fair,6370.0,6200.0
native-country = United-States,Fair,Fair,Fair,Fair,Fair,Fair,Fair,Fair,Fair,Fair,Fair,Fair
"capital-loss = 0, native-country = United-States",Fair,Fair,Fair,Fair,3827.0,10.0,2495.0,3604.0,Fair,Fair,7921.0,5693.0
...,...,...,...,...,...,...,...,...,...,...,...,...
"Workclass = Private, age = (16.999, 26.0], capital-gain = 0, hours-per-week = PartTime, marital-status = Never-married, native-country = United-States, occupation = Other-service",Fair,Fair,Fair,Fair,7591.0,Fair,Fair,Fair,Fair,Fair,3169.0,10155.0
"hours-per-week = PartTime, marital-status = Never-married, occupation = Other-service, sex = Male",Fair,Fair,Fair,Fair,3188.0,Fair,Fair,2959.0,Fair,Fair,Fair,4852.0
"Workclass = Private, hours-per-week = PartTime, native-country = United-States, occupation = Other-service, sex = Male",Fair,Fair,Fair,Fair,2238.0,Fair,1177.0,1980.0,Fair,Fair,4750.0,3290.0
"Workclass = Private, capital-gain = 0, capital-loss = 0, hours-per-week = PartTime, occupation = Other-service, sex = Male",Fair,Fair,Fair,Fair,4550.0,Fair,2662.0,4330.0,Fair,Fair,3165.0,5981.0


In [27]:
rev_bias_metrics = ['Equal Effectiveness', 'Equal Effectiveness within Budget']
comb_df = get_comb_df(df, ranked, diff, rev_bias_metrics, sensitive_attribute_vals=[" White", " Non-White"])
analysis_df, rank_analysis_df = get_analysis_dfs(comb_df, ranked, sensitive_attribute_vals=["White", "Non-White"])

  z = z.drop(columns=["Fair Effectiveness-Cost Trade-Off"])
  data_df = data_df.append(total_row)


In [28]:
comb_df.head()

Unnamed: 0_level_0,"(Equal Cost of Effectiveness(Macro), 0.3)","(Equal Cost of Effectiveness(Macro), 0.3)","(Equal Cost of Effectiveness(Macro), 0.3)","(Equal Cost of Effectiveness(Macro), 0.7)","(Equal Cost of Effectiveness(Macro), 0.7)","(Equal Cost of Effectiveness(Macro), 0.7)","(Equal Choice for Recourse, 0.3)","(Equal Choice for Recourse, 0.3)","(Equal Choice for Recourse, 0.3)","(Equal Choice for Recourse, 0.7)",...,"(Equal Cost of Effectiveness(Micro), 0.3)","(Equal Cost of Effectiveness(Micro), 0.7)","(Equal Cost of Effectiveness(Micro), 0.7)","(Equal Cost of Effectiveness(Micro), 0.7)",Equal(Conditional Mean Recourse),Equal(Conditional Mean Recourse),Equal(Conditional Mean Recourse),Fair Effectiveness-Cost Trade-Off,Fair Effectiveness-Cost Trade-Off,Fair Effectiveness-Cost Trade-Off
Unnamed: 0_level_1,rank,score,bias against,rank,score,bias against,rank,score,bias against,rank,...,bias against,rank,score,bias against,rank,score,bias against,rank,score,bias against
subgroup,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
capital-loss = 0,Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,...,Fair,Fair,0.0,Fair,7890.0,0.00028,White,6536.0,0.07892,Non-White
capital-gain = 0,30.0,0.0212,Non-White,Fair,0.0,Fair,14.0,1.0,Non-White,Fair,...,Non-White,Fair,0.0,Fair,6430.0,0.018996,White,6216.0,0.0838,Non-White
"capital-gain = 0, capital-loss = 0",30.0,0.0212,Non-White,Fair,0.0,Fair,14.0,1.0,Non-White,Fair,...,Non-White,Fair,0.0,Fair,6370.0,0.020344,White,6200.0,0.084078,Non-White
native-country = United-States,Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,...,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,Non-White
"capital-loss = 0, native-country = United-States",Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,...,Fair,Fair,0.0,Fair,7921.0,0.000109,White,5693.0,0.091356,Non-White


In [29]:
analysis_df

Unnamed: 0,Rank = 1 Count,White bias against Count,Non-White bias against Count
"(Equal Cost of Effectiveness(Macro), 0.3)",1368,2372,616
"(Equal Cost of Effectiveness(Micro), 0.3)",1368,2377,616
"(Equal Cost of Effectiveness(Macro), 0.7)",546,1001,51
"(Equal Cost of Effectiveness(Micro), 0.7)",546,1001,51
Equal(Conditional Mean Recourse),1274,8248,5231
"(Equal Effectiveness within Budget, 1.150241502415024)",1,142,59
"(Equal Effectiveness within Budget, 10.150241502415025)",1,7229,5189
"(Equal Effectiveness within Budget, 21.0)",1,8724,5757
"(Equal Choice for Recourse, 0.7)",1,1117,81
"(Equal Choice for Recourse, 0.3)",1,3554,773


In [30]:
rank_analysis_df

Unnamed: 0,Fair Effectiveness-Cost Trade-Off,"(Equal Effectiveness within Budget, 21.0)","(Equal Choice for Recourse, 0.7)","(Equal Cost of Effectiveness(Micro), 0.7)","(Equal Cost of Effectiveness(Macro), 0.7)","(Equal Effectiveness within Budget, 1.150241502415024)","(Equal Effectiveness within Budget, 10.150241502415025)","(Equal Choice for Recourse, 0.3)","(Equal Cost of Effectiveness(Micro), 0.3)","(Equal Cost of Effectiveness(Macro), 0.3)",Equal(Conditional Mean Recourse),Equal Effectiveness
Fair Effectiveness-Cost Trade-Off,1.0,12311.0,10.0,1.0,1.0,182.0,10535.0,12.0,1.0,1.0,7972.0,1.0
"(Equal Effectiveness within Budget, 21.0)",40.0,1.0,11.0,24.0,24.0,182.0,1.0,14.0,1.0,1.0,1.0,27.0
"(Equal Choice for Recourse, 0.7)",808.0,237.0,1.0,3.0,3.0,182.0,924.0,15.0,41.0,40.0,5238.0,399.0
"(Equal Cost of Effectiveness(Micro), 0.7)",1768.8,2400.4,9.0,1.0,1.0,180.0,2586.2,12.5,27.7,26.7,4056.1,2161.4
"(Equal Cost of Effectiveness(Macro), 0.7)",1768.8,2400.4,9.0,1.0,1.0,180.0,2586.2,12.5,27.7,26.7,4056.1,2161.4
"(Equal Effectiveness within Budget, 1.150241502415024)",3464.0,1841.0,10.0,1.0,1.0,1.0,1273.0,15.0,41.0,40.0,7972.0,2100.0
"(Equal Effectiveness within Budget, 10.150241502415025)",40.0,1.0,11.0,24.0,24.0,182.0,1.0,14.0,1.0,1.0,1.0,27.0
"(Equal Choice for Recourse, 0.3)",227.0,621.0,10.0,1.0,1.0,182.0,15.0,1.0,10.0,9.0,733.0,861.0
"(Equal Cost of Effectiveness(Micro), 0.3)",2964.2,3237.7,11.0,23.9,23.9,181.2,4421.6,13.0,1.0,1.0,4129.4,2214.3
"(Equal Cost of Effectiveness(Macro), 0.3)",2964.2,3237.7,11.0,23.9,23.9,181.2,4421.6,13.0,1.0,1.0,4129.4,2214.3


In [31]:
#top_rank = ranked[(ranked[('mincost-above-th', 0.7)]==1) | (ranked[('number-above-th', 0.7)]==1) |  (ranked['total-effectiveness']==1) | (ranked['total-effectiveness']==1) | (ranked[('eff-within-budget', 10.0)]==1) | (ranked[('eff-within-budget', 18.0)]==1) | (ranked[('eff-within-budget', 5.076880768807688)]==1) | (ranked[('cost-of-effectiveness', 0.7)]==1) | (ranked[('cost-of-effectiveness', 0.3)]==1) | (ranked[('KStest', 'value')]==1)]

In [32]:
# ranked_indexed = top_rank.set_index([top_rank.index, "subgroup"])
# ranked_fair = ranked_indexed == 'Fair'
# ranked_more_than_2000 = ranked_indexed.mask(ranked_indexed=='Fair') >= 1000
# new_rank = ranked_indexed[ranked_fair.apply(lambda row: row.sum() >=4 , axis=1) | ranked_more_than_2000.apply(lambda row : row.sum() >= 6,axis=1)]
# new_rank.head()

In [33]:
# sgs = [
#     ranked.iloc[9841].subgroup,
# ]

# rules_sgs_atomic = {ifc: rules_with_atomic_correctness[ifc] for ifc in sgs}
# rules_sgs_cumulative = {ifc: rules_with_cumulative_correctness[ifc] for ifc in sgs}

In [34]:
# df.iloc[4228].to_frame().T

In [35]:
# top_rules, subgroup_costs = facts.select_rules_subset(
#     rules_sgs_atomic,
#     metric="min-above-thr",
#     sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
#     cor_threshold=0.7,
#     filter_sequence = [
#         # "remove-contained",
#         "remove-below-thr",        
#         #"remove-fair-rules", 
#         "keep-only-min-change"
#     ],
#     params=params
# )

# print_recourse_report(
#     top_rules,
#     subgroup_costs=subgroup_costs,
#     show_subgroup_costs=True
# )

In [36]:
# top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
#     rules_sgs_cumulative,
#     metric="max-upto-cost",
#     sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
#     top_count=top_count,
#     cost_threshold = 10,
#     filter_sequence = [
#         # "remove-contained",
#         # "remove-fair-rules",
#         "keep-cheap-rules-above-thr-cor",
#         # "keep-only-min-change",
        
#     ],
#     params=params
# )

# print_recourse_report_cumulative(
#     top_rules,
#     subgroup_costs=subgroup_costs,
#     show_subgroup_costs=True,
#     show_then_costs=True,
#     correctness_metric=True
# )