Datasets used: 
- `adult (Adult)` :<br>The UCI Adult dataset contains 14 features, which can be divided into two categories: demographic and income-related.<br> The demographic features include:

    age: continuous.<br>
    fnlwgt : continuous, represents final weight, which is the number of units in the target population that the responding unit represents.<br>
    workclass: categorical, with values 'Private', 'Local-gov', 'Self-emp-not-inc', 'Federal-gov', 'State-gov', 'Self-emp-inc', 'Without-pay'.<br>
    education: categorical, with values '11th', 'HS-grad', 'Assoc-acdm', 'Some-college', '10th', ..., '9th', 'Doctorate', '12th', '1st-4th', 'Preschool'.<br>
    education-num: continuous with values 1 to 16, one number assigned to each label of education feature.<br>
    marital-status: categorical, with values 'Never-married', 'Married-civ-spouse', 'Widowed', 'Separated', 'Divorced', 'Married-spouse-absent', 'Married-AF-spouse'.<br>
    occupation: categorical, with values such as 'Machine-op-inspct', 'Farming-fishing', 'Protective-serv'.<br>
    relationship: categorical, with values 'Own-child', 'Husband', 'Not-in-family', 'Unmarried', 'Wife', 'Other-relative'.<br>
    race: categorical, with values such as white, black, and Asian.<br>
    sex: categorical, with values male and female.<br>
    
    The income-related features include:<br>

    hours-per-week: continuous.<br>
    native-country: categorical, with values such as United-States, Mexico, and Germany.<br>
    capital-gain: continuous, represent the amount of money an individual has gained from the sale of investments such as stocks, bonds, or real estate.<br>
    capital-loss: continuous, represent the amount of money an individual has lost from the sale of investments such as stocks, bonds, or real estate..<br>
    The target feature is the income, which is binary:<br>
    income: categorical, with values less than or equal to 50K and greater than 50K.<br>

Reference Links: https://archive.ics.uci.edu/ml/datasets/adult

In [1]:
import pandas as pd
import numpy as np

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
import warnings
warnings.filterwarnings('ignore')
import facts
from facts.clean import clean_dataset
from facts import valid_ifthens_with_coverage_correctness, rules2rulesbyif
from facts.models import customLogisticRegression
from facts.parameters import ParameterProxy
from facts.formatting import recourse_report_reverse, print_recourse_report, print_recourse_report_cumulative, print_recourse_report_KStest_cumulative
from facts.utils import load_rules_by_if
from facts.fairness_metrics_aggr import get_diff_table, get_comb_df, get_analysis_dfs

In [2]:
cost_budget = 10
top_count = 20
c_inf = 5

# Dataset

In [3]:
urlfile= "https://raw.githubusercontent.com/columbia/fairtest/master/data/adult/adult.csv"

X = pd.read_csv(urlfile)
df = clean_dataset(X, "adult")

age = [val.left for val in df.age.unique()]
age.sort()

df.head()

Unnamed: 0,age,Workclass,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,"(34.0, 41.0]",State-gov,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,FullTime,United-States,0
1,"(41.0, 50.0]",Self-emp-not-inc,13,Married-civ-spouse,Exec-managerial,Married,White,Male,0,0,PartTime,United-States,0
2,"(34.0, 41.0]",Private,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,FullTime,United-States,0
3,"(50.0, 90.0]",Private,7,Married-civ-spouse,Handlers-cleaners,Married,Black,Male,0,0,FullTime,United-States,0
4,"(26.0, 34.0]",Private,13,Married-civ-spouse,Prof-specialty,Married,Black,Female,0,0,FullTime,Cuba,0


In [4]:
y = df['income']
X = df.drop('income', axis=1)

num_features = X._get_numeric_data().columns.to_list()
cate_features = X.select_dtypes(include=['object','category']).columns.to_list()
ord_features = ['hours-per-week']

# Rules

In [5]:
rules_with_atomic_correctness, Xtest, model = load_rules_by_if("rules-Xtest-model-2_race (seed 131313).data")
rules_with_cumulative_correctness = load_rules_by_if("rulesAdultFairTest_race_cumulative (seed 131313).data")

In [6]:
feature_weights = {"race" : 100 , "sex": 100, "marital-status": 5, "relationship":5, "age": 10, "occupation": 4, "Workclass": 2, "native-country": 4,
                    "hours-per-week":2, "capital-gain": 1, "capital-loss":1, "education-num": 3}
features_with_binary_cost = cate_features
features_with_proportional_cost = num_features


comparators = facts.feature_change_builder(
    X,
    num_cols=features_with_proportional_cost,
    cate_cols=features_with_binary_cost,
    ord_cols=ord_features,
    feature_weights=feature_weights,
    num_normalization=True,
    feats_to_normalize = ["capital-gain","capital-loss"]
)
params = ParameterProxy(featureChanges=comparators)

# Macro Viewpoint

## Equal Cost of Effectiveness (Macro)

In [7]:
top_rules, subgroup_costs = facts.select_rules_subset(
    rules_with_atomic_correctness,
    metric="min-above-thr",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    cor_threshold=0.7,
    filter_sequence = [
        "remove-contained",
        "remove-below-thr",        
        "remove-fair-rules",
        "keep-only-min-change"
    ],
    params=params
)

print_recourse_report(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    metric_name = 'Equal Cost of Effectiveness (Macro) (threshold = 0.7)'
)

If [1mage = (26.0, 34.0], marital-status =  Never-married, occupation =  Prof-specialty, relationship =  Not-in-family[0m:
	Protected Subgroup '[1m Non-White[0m', [34m1.15%[39m covered
		Make [1m[31mage = (41.0, 50.0][39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m80.00%[39m.
		Make [1m[31mage = (34.0, 41.0][39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m80.00%[39m.
		[1mAggregate cost[0m of the above recourses = [35m20.00[39m
	Protected Subgroup '[1m White[0m', [34m1.18%[39m covered
		Make [1m[31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m75.47%[39m.
		[1mAggregate cost[0m of the above recourses = [35m10.00[39m
	[35mBias against  Non-White due to Equal Cost of Effectiveness (Macro) (threshold = 0.7). Unfairness score = 10.[39m
If [1mage = (26.0, 34.0], educat

## Equal Choice for Recourse

In [8]:
top_rules, subgroup_costs = facts.select_rules_subset(
    rules_with_atomic_correctness,
    metric="num-above-thr",
    sort_strategy="generic-sorting-ignore-forall-subgroups-empty",
    top_count=top_count,
    cor_threshold=0.7,
    filter_sequence = [
        "remove-contained",
        "remove-below-thr",
        "remove-fair-rules"
    ],
    params=params
)

print_recourse_report(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    metric_name = 'Equal Choice for Recourse(Macro) (threshold = 0.7)'
)

If [1mage = (26.0, 34.0], marital-status =  Never-married, occupation =  Prof-specialty, relationship =  Not-in-family[0m:
	Protected Subgroup '[1m Non-White[0m', [34m1.15%[39m covered
		Make [1m[31mage = (41.0, 50.0][39m, [31mmarital-status =  Married-civ-spouse[39m, [31moccupation =  Exec-managerial[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m90.00%[39m.
		Make [1m[31mage = (41.0, 50.0][39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m80.00%[39m.
		Make [1m[31mage = (34.0, 41.0][39m, [31mmarital-status =  Married-civ-spouse[39m, [31moccupation =  Exec-managerial[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m80.00%[39m.
		Make [1m[31mage = (34.0, 41.0][39m, [31mmarital-status =  Married-civ-spouse[39m, [31mrelationship =  Married[39m[0m with effectiveness [32m80.00%[39m.
		Make [1m[31mage = (41.0, 50.0][39m, [31mmarital-status =  Married

# New Metrics

## Equal Conditional Mean Recourse

In [9]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="fairness-of-mean-recourse-conditional",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    filter_sequence = [
        "remove-contained",
        "remove-fair-rules"
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True,
    metric_name = 'Equal Conditional Mean Recourse'
)

If [1mWorkclass =  Private, capital-loss = 0, education-num = 10, hours-per-week = FullTime, marital-status =  Never-married, sex =  Male[0m:
	Protected Subgroup '[1m Non-White[0m', [34m2.35%[39m covered
		Make [1m[31mmarital-status =  Married-civ-spouse[39m[0m with effectiveness [32m2.44%[39m and counterfactual cost = 1.0.
		Make [1m[31mhours-per-week = OverTime[39m, [31mmarital-status =  Married-civ-spouse[39m[0m with effectiveness [32m2.44%[39m and counterfactual cost = 2.0.
		Make [1m[31mhours-per-week = BrainDrain[39m, [31mmarital-status =  Married-civ-spouse[39m[0m with effectiveness [32m2.44%[39m and counterfactual cost = 3.0.
		Make [1m[31meducation-num = 11[39m, [31mmarital-status =  Married-civ-spouse[39m[0m with effectiveness [32m2.44%[39m and counterfactual cost = 21.0.
		Make [1m[31meducation-num = 11[39m, [31mhours-per-week = OverTime[39m, [31mmarital-status =  Married-civ-spouse[39m[0m with effectiveness [32m2.44%[39m and cou

## Equal Cost of Effectiveness (Micro)

In [10]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="min-above-corr",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    cor_threshold = 0.7,
    filter_sequence = [
        "remove-contained",
        "remove-fair-rules",
        "keep-cheap-rules-above-thr-cor",
        
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True,
    metric_name = 'Equal Cost of Effectiveness (Micro) (threshold = 0.7)'
)

If [1meducation-num = 9, marital-status =  Married-civ-spouse, occupation =  Transport-moving, sex =  Male[0m:
	Protected Subgroup '[1m Non-White[0m', [34m1.78%[39m covered
		Make [1m[31moccupation =  Craft-repair[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Sales[39m[0m with effectiveness [32m6.45%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Exec-managerial[39m[0m with effectiveness [32m29.03%[39m and counterfactual cost = 1.0.
		Make [1m[31meducation-num = 10[39m, [31moccupation =  Craft-repair[39m[0m with effectiveness [32m29.03%[39m and counterfactual cost = 21.0.
		Make [1m[31meducation-num = 10[39m, [31moccupation =  Sales[39m[0m with effectiveness [32m29.03%[39m and counterfactual cost = 21.0.
		Make [1m[31meducation-num = 10[39m, [31moccupation =  Prof-specialty[39m[0m with effectiveness [32m29.03%[39m and counterfactual cost = 21.0.
		Make [1m[31meducation

## Equal Effectiveness

In [12]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="total-correctness",
    sort_strategy="generic-sorting-ignore-forall-subgroups-empty",
    top_count=top_count,
    filter_sequence = [
        "remove-contained",
        "remove-fair-rules",
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True,
    correctness_metric=True,
    metric_name = 'Equal Effectiveness'
)

If [1mage = (34.0, 41.0], capital-loss = 0, native-country =  United-States, occupation =  Machine-op-inspct[0m:
	Protected Subgroup '[1m Non-White[0m', [34m1.38%[39m covered
		Make [1m[31moccupation =  Exec-managerial[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Prof-specialty[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Sales[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Craft-repair[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Adm-clerical[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Tech-support[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mage = (41.0, 50.0][39m, [31moccupation =  Exec-managerial[39m[0m with effectiveness [32m0.00

## Fair Effectiveness-Cost Trade-Off

In [14]:
# taken from the other notebooks
preds_Xtest = model.predict(Xtest)
affected_pop_sizes = {sg: ((Xtest["race"] == sg) & (preds_Xtest == 0)).sum() for sg in Xtest["race"].unique()}

top_rules,unfairness = facts.select_rules_subset_KStest(
    rules_with_cumulative_correctness,
    affected_pop_sizes,
    top_count=top_count
)

print_recourse_report_KStest_cumulative(
    top_rules,
    population_sizes=affected_pop_sizes,
    unfairness = unfairness,
    show_then_costs=True,
    metric_name = 'Fair Effectiveness-Cost Trade-Off'
    # show_cumulative_plots=True
)
    # show_cumulative_plots=True


If [1mcapital-gain = 0, native-country =  United-States[0m:
	Protected Subgroup '[1m Non-White[0m', [34m76.82%[39m covered out of 1743
		Make [1m[31mcapital-gain = 3103[39m[0m with effectiveness [32m9.19%[39m and counterfactual cost = 0.03.
		Make [1m[31mcapital-gain = 4386[39m[0m with effectiveness [32m12.99%[39m and counterfactual cost = 0.04.
		Make [1m[31mcapital-gain = 5178[39m[0m with effectiveness [32m17.77%[39m and counterfactual cost = 0.05.
		Make [1m[31mcapital-gain = 7298[39m[0m with effectiveness [32m27.71%[39m and counterfactual cost = 0.07.
		Make [1m[31mcapital-gain = 7688[39m[0m with effectiveness [32m29.87%[39m and counterfactual cost = 0.08.
		Make [1m[31mcapital-gain = 15024[39m[0m with effectiveness [32m74.46%[39m and counterfactual cost = 0.15.
		Make [1m[31mcapital-gain = 99999[39m[0m with effectiveness [32m100.00%[39m and counterfactual cost = 1.0.
	Protected Subgroup '[1m White[0m', [34m89.37%[39m covered out 

## Equal Effectiveness within Budget (Micro)

In [15]:
top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
    rules_with_cumulative_correctness,
    metric="max-upto-cost",
    sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
    top_count=top_count,
    cost_threshold = cost_budget,
    filter_sequence = [
        "remove-contained",
        "remove-fair-rules",
        
    ],
    params=params
)

print_recourse_report_cumulative(
    top_rules,
    subgroup_costs=subgroup_costs,
    show_subgroup_costs=True,
    show_then_costs=True,
    correctness_metric = True,
    metric_name = 'Equal Effectiveness within Budget (Micro)'
)

If [1mage = (34.0, 41.0], capital-loss = 0, native-country =  United-States, occupation =  Machine-op-inspct[0m:
	Protected Subgroup '[1m Non-White[0m', [34m1.38%[39m covered
		Make [1m[31moccupation =  Exec-managerial[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Prof-specialty[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Sales[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Craft-repair[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Adm-clerical[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31moccupation =  Tech-support[39m[0m with effectiveness [32m0.00%[39m and counterfactual cost = 1.0.
		Make [1m[31mage = (41.0, 50.0][39m, [31moccupation =  Exec-managerial[39m[0m with effectiveness [32m0.00

## Get ranking of subgroups based on metrics

In [16]:
from facts.fairness_metrics_aggr import make_table, auto_budget_calculation

In [17]:
rules_with_both = {}
for ifc, all_thens in rules_with_cumulative_correctness.items():
    new_all_thens = {}
    for sg, (cov, thens_cum) in all_thens.items():
        thens_atom = rules_with_atomic_correctness[ifc][sg][1]
        thens_atom_dict = {then: atom_cor for then, atom_cor in thens_atom}
        new_all_thens[sg] = (cov, [(then, thens_atom_dict[then], cumcor) for then, cumcor, _cost in thens_cum])
    rules_with_both[ifc] = new_all_thens

In [18]:
budgets = auto_budget_calculation(rules_with_cumulative_correctness, cor_thres=0.5, percentiles=[0.3, 0.6, 0.9])

df = make_table(
    rules_with_both,
    sensitive_attribute_vals=[" Non-White", " White"],
    effectiveness_thresholds=[0.3, 0.7],
    cost_budgets=budgets,
    params=params
)
df.drop(['weighted-average', 'mean-cost-cinf'], axis=1, inplace=True)

In [19]:
#table of differences between Male and Female for each metric
diff = get_diff_table(df, sensitive_attribute_vals=[" White", " Non-White"], with_abs = True)
diff = diff.set_index('subgroup')
ranked = diff.drop(columns=[('Fair Effectiveness-Cost Trade-Off','bias')]).mask(diff == 0).rank(ascending = False,axis=0,method='dense')
ranked = ranked.replace(np.nan,"Fair")

In [20]:
rev_bias_metrics = ['Equal Effectiveness', 'Equal Effectiveness within Budget']
comb_df = get_comb_df(df, ranked, diff, rev_bias_metrics, sensitive_attribute_vals=[" White", " Non-White"])
analysis_df,rank_analysis_df = get_analysis_dfs(comb_df,ranked, sensitive_attribute_vals=["White", "Non-White"])

In [21]:
comb_df.head()

Unnamed: 0_level_0,"(Equal Cost of Effectiveness(Macro), 0.3)","(Equal Cost of Effectiveness(Macro), 0.3)","(Equal Cost of Effectiveness(Macro), 0.3)","(Equal Cost of Effectiveness(Macro), 0.7)","(Equal Cost of Effectiveness(Macro), 0.7)","(Equal Cost of Effectiveness(Macro), 0.7)","(Equal Choice for Recourse, 0.3)","(Equal Choice for Recourse, 0.3)","(Equal Choice for Recourse, 0.3)","(Equal Choice for Recourse, 0.7)",...,"(Equal Cost of Effectiveness(Micro), 0.3)","(Equal Cost of Effectiveness(Micro), 0.7)","(Equal Cost of Effectiveness(Micro), 0.7)","(Equal Cost of Effectiveness(Micro), 0.7)",Equal(Conditional Mean Recourse),Equal(Conditional Mean Recourse),Equal(Conditional Mean Recourse),Fair Effectiveness-Cost Trade-Off,Fair Effectiveness-Cost Trade-Off,Fair Effectiveness-Cost Trade-Off
Unnamed: 0_level_1,rank,score,bias against,rank,score,bias against,rank,score,bias against,rank,...,bias against,rank,score,bias against,rank,score,bias against,rank,score,bias against
subgroup,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
capital-loss = 0,Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,...,Fair,Fair,0.0,Fair,7497.0,0.000156,Non-White,4615.0,0.114881,Non-White
capital-gain = 0,32.0,0.0251,Non-White,Fair,0.0,Fair,9.0,2.0,Non-White,Fair,...,Non-White,Fair,0.0,Fair,5091.0,0.063698,Non-White,3307.0,0.143342,Non-White
"capital-gain = 0, capital-loss = 0",32.0,0.0251,Non-White,Fair,0.0,Fair,9.0,2.0,Non-White,Fair,...,Non-White,Fair,0.0,Fair,5084.0,0.064657,Non-White,3309.0,0.143281,Non-White
"capital-loss = 0, native-country = United-States",Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,...,Fair,Fair,0.0,Fair,7495.0,0.00016,White,4027.0,0.127129,Non-White
Workclass = Private,Fair,0.0,Fair,Fair,0.0,Fair,Fair,0.0,Fair,Fair,...,Fair,Fair,0.0,Fair,Fair,0.0,Fair,8820.0,0.043964,Non-White


In [22]:
analysis_df

Unnamed: 0,Rank = 1 Count,White bias against Count,Non-White bias against Count
"(Equal Cost of Effectiveness(Macro), 0.3)",1731,148,2802
"(Equal Cost of Effectiveness(Micro), 0.3)",1720,149,2794
Equal(Conditional Mean Recourse),2545,3892,9800
"(Equal Cost of Effectiveness(Macro), 0.7)",325,58,526
"(Equal Cost of Effectiveness(Micro), 0.7)",325,58,522
"(Equal Effectiveness within Budget, 1.150241502415024)",1,54,189
Equal Effectiveness,1,11884,2512
"(Equal Effectiveness within Budget, 21.0)",1,2490,11744
Fair Effectiveness-Cost Trade-Off,2,2272,14389
"(Equal Choice for Recourse, 0.7)",2,73,628


In [23]:
rank_analysis_df

Unnamed: 0,"(Equal Cost of Effectiveness(Micro), 0.3)",Equal Effectiveness,Fair Effectiveness-Cost Trade-Off,"(Equal Choice for Recourse, 0.7)","(Equal Cost of Effectiveness(Macro), 0.7)",Equal(Conditional Mean Recourse),"(Equal Choice for Recourse, 0.3)","(Equal Cost of Effectiveness(Macro), 0.3)","(Equal Effectiveness within Budget, 1.150241502415024)","(Equal Cost of Effectiveness(Micro), 0.7)","(Equal Effectiveness within Budget, 10.072980729807298)","(Equal Effectiveness within Budget, 21.0)"
"(Equal Cost of Effectiveness(Micro), 0.3)",1.0,1909.8,2438.0,7.0,21.0,3386.3,9.3,1.0,222.1,20.0,2911.7,2068.7
Equal Effectiveness,1.0,1.0,1.0,7.0,21.0,1.0,10.0,1.0,223.0,20.0,107.0,1.0
Fair Effectiveness-Cost Trade-Off,1.0,13.0,1.0,7.0,21.0,24.0,9.0,1.0,223.0,20.0,122.0,12.5
"(Equal Choice for Recourse, 0.7)",43.0,7602.0,140.0,1.0,2.0,306.0,10.0,43.0,223.0,2.0,26.5,3831.5
"(Equal Cost of Effectiveness(Macro), 0.7)",30.0,1734.5,1507.8,5.8,1.0,3177.6,9.9,30.1,217.0,1.0,3227.1,1976.2
Equal(Conditional Mean Recourse),42.1,7404.3,9040.6,7.0,21.0,1.0,11.0,42.1,223.0,20.0,7979.9,7380.8
"(Equal Choice for Recourse, 0.3)",18.0,11799.0,909.0,7.0,21.0,5897.0,1.0,18.0,223.0,20.0,303.0,11666.0
"(Equal Cost of Effectiveness(Macro), 0.3)",1.1,1907.8,2434.9,7.0,21.0,3379.9,9.3,1.0,222.1,20.0,2902.0,2065.5
"(Equal Effectiveness within Budget, 1.150241502415024)",43.0,815.0,1275.0,6.0,1.0,7540.0,11.0,43.0,1.0,1.0,478.0,813.0
"(Equal Cost of Effectiveness(Micro), 0.7)",30.0,1734.5,1507.8,5.8,1.0,3177.6,9.9,30.1,217.0,1.0,3227.1,1976.2


In [25]:
#top_rank = ranked[(ranked[('mincost-above-th', 0.7)]==1) | (ranked[('number-above-th', 0.7)]==1) |  (ranked['total-effectiveness']==1) | (ranked['total-effectiveness']==1) | (ranked[('eff-within-budget', 10.0)]==1) | (ranked[('eff-within-budget', 18.0)]==1) | (ranked[('eff-within-budget', 5.076880768807688)]==1) | (ranked[('cost-of-effectiveness', 0.7)]==1) | (ranked[('cost-of-effectiveness', 0.3)]==1) | (ranked[('KStest', 'value')]==1)]

In [26]:
# ranked_indexed = top_rank.set_index([top_rank.index, "subgroup"])
# ranked_fair = ranked_indexed == 'Fair'
# ranked_more_than_2000 = ranked_indexed.mask(ranked_indexed=='Fair') >= 1000
# new_rank = ranked_indexed[ranked_fair.apply(lambda row: row.sum() >=4 , axis=1) | ranked_more_than_2000.apply(lambda row : row.sum() >= 6,axis=1)]
# new_rank.head()

In [27]:
# sgs = [
#     ranked.iloc[9841].subgroup,
# ]

# rules_sgs_atomic = {ifc: rules_with_atomic_correctness[ifc] for ifc in sgs}
# rules_sgs_cumulative = {ifc: rules_with_cumulative_correctness[ifc] for ifc in sgs}

In [28]:
# df.iloc[4228].to_frame().T

In [29]:
# top_rules, subgroup_costs = facts.select_rules_subset(
#     rules_sgs_atomic,
#     metric="min-above-thr",
#     sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
#     cor_threshold=0.7,
#     filter_sequence = [
#         # "remove-contained",
#         "remove-below-thr",        
#         #"remove-fair-rules", 
#         "keep-only-min-change"
#     ],
#     params=params
# )

# print_recourse_report(
#     top_rules,
#     subgroup_costs=subgroup_costs,
#     show_subgroup_costs=True
# )

In [30]:
# top_rules, subgroup_costs = facts.select_rules_subset_cumulative(
#     rules_sgs_cumulative,
#     metric="max-upto-cost",
#     sort_strategy="generic-sorting-ignore-exists-subgroup-empty",
#     top_count=top_count,
#     cost_threshold = 10,
#     filter_sequence = [
#         # "remove-contained",
#         # "remove-fair-rules",
#         "keep-cheap-rules-above-thr-cor",
#         # "keep-only-min-change",
        
#     ],
#     params=params
# )

# print_recourse_report_cumulative(
#     top_rules,
#     subgroup_costs=subgroup_costs,
#     show_subgroup_costs=True,
#     show_then_costs=True,
#     correctness_metric=True
# )