In [1]:
import numpy as np
import pandas as pd
from pprint import pprint

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# place all *.py files in the same folder
import lib2
from lib2 import Predicate, optimize
from models import customXGB
from apriori import preprocessDataset, runApriori, aprioriout2predicateList
from parameters import ParameterProxy

import matplotlib.pyplot as plt

In [2]:
DATAFILE = '../adult.data' # location of dataset
random_state = None # change to something for exactly reproducible results
sensitive_attribute = "Sex"
target_name = "label"
positive_label = ">50K"
negative_label = "<=50K"
model_train_fraction = 0.7

In [3]:
# specify feature names
feature_names = [
   "Age", "Workclass", "fnlwgt", "Education",
   "Education-Num", "Marital Status", "Occupation",
   "Relationship", "Race", "Sex", "Capital Gain",
   "Capital Loss", "Hours per week", "Country", "label"
]
# specify categorical columns
cate_columns = ['Workclass', 'Education', 'Marital Status', 'Occupation', 'Relationship', 'Race', 'Sex', 'Country']

In [4]:
# WARNING: after changing any of the values, restart the notebook

# define featureCost as a mapping holding, for each feature name, the respective cost of not keeping that feature constant
# Any feature not specified will have its cost set to 1
featureCosts = {"Sex": 100}

# define featureChange as a mapping from str to function, holding, for each feature name, the respective 
# function that calculates the cost of change from one value to another.
# Any feature change not specified will be set to 1 if there is change, and 0 otherwise.
def age_cost(age1: str, age2: str) -> int:
    return abs(int(age1) - int(age2))
featureChange = {"Age": age_cost}

# set the weights that manage the relative influence of coverage, correctness, feature cost and feature change
# in the objective function of the algorithm
l_cover = 1
l_correct = 3
l_cost = 1
l_change = 1

In [6]:
params = ParameterProxy(
    featureCosts=featureCosts,
    featureChanges=featureChange,
    lambda_correctness=l_correct,
    lambda_cover=l_cover,
    lambda_featureChange=l_change,
    lambda_featureCost=l_cost
)

# Data loading

Loads the dataset into variable `data`. Just run it.

In [7]:
data = pd.DataFrame(
  np.genfromtxt(DATAFILE, delimiter=', ', dtype=str),
  columns=feature_names
)

# Train test split

Split into train-test. The train set is used specifically for training the model, and nothing further.

In [8]:
X = data.drop(target_name, axis=1)
y = data[target_name]

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=model_train_fraction, random_state=random_state)

X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

# Definition and Training of a black-box model

We use a black box model based on gradient boosted decision trees.

In [9]:
model = customXGB(n_estimators=300, max_depth=5)
model.fit(X_train, y_train, cate_columns=cate_columns)

<models.customXGB at 0x23360f19d90>

0 denotes the negative and 1 the positive class.

In [10]:
model.predict(X_test.iloc[:100, :])

array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1])

# Testing the model

We check if the model is satisfactory.

In [11]:
preds = model.predict(X_test)
print(classification_report(y_test.map({negative_label: 0, positive_label: 1}), preds))

              precision    recall  f1-score   support

           0       0.89      0.94      0.92      7409
           1       0.77      0.65      0.71      2360

    accuracy                           0.87      9769
   macro avg       0.83      0.80      0.81      9769
weighted avg       0.86      0.87      0.87      9769



# Finding the affected

We check the predictions of the model on the test data, i.e. our population. The ones that get a 0 (negative outcome) are called the affected individuals.

In [12]:
X_aff_idxs = np.where(model.predict(X_test) == 0)[0]
print(f"Test data shape: {X_test.shape}")
X_aff = X_test.iloc[X_aff_idxs, :]
print(f"Affected shape: {X_aff.shape}")
# X_aff.reset_index(inplace=True, drop=True)

Test data shape: (9769, 14)
Affected shape: (7768, 14)


In [20]:
x = X_aff.copy()
x["Capital Gain"] = '15000'
print(X_aff["Capital Gain"].value_counts().to_string())
model.predict(x).sum()

0        7451
5013       22
3325       21
3137       16
2174       15
4064       15
594        12
4650       11
4386       10
2597        9
3908        8
2414        8
2885        8
3411        8
2829        7
2407        7
4416        7
2176        6
6849        6
1055        6
2635        5
4865        5
2354        5
5455        4
4101        4
1848        4
3674        4
2202        4
2580        4
1797        3
4508        3
3464        3
2228        3
3273        3
2964        3
3471        2
1151        2
1424        2
2907        2
1831        2
401         2
3887        2
2463        2
2961        2
3418        2
4934        2
3942        2
914         2
2105        2
6360        2
114         2
2329        2
1455        1
2653        1
1086        1
1506        1
3432        1
2538        1
20051       1
1471        1
2290        1
991         1
1409        1
6723        1
3781        1
7896        1
2062        1
3818        1
6497        1
1173        1
2346        1
7443  

7048

This means that the model classifies into the negative class about 7000 out of the 10000 total (test set) individuals.

## and the unaffected

In [44]:
X_unaff_idxs = np.where(model.predict(X_test) == 1)[0]
X_unaff = X_test.iloc[X_unaff_idxs, :]
print(f"Unaffected shape: {X_unaff.shape}")
# X_aff.reset_index(inplace=True, drop=True)

Unaffected shape: (2001, 14)


In [45]:
X_unaff["Capital Gain"].value_counts()

0        1501
15024      99
7688       84
7298       68
99999      49
5178       31
3103       25
4386       17
8614       16
14084      16
20051      14
10520      13
27828      11
9386        9
14344       9
13550       7
15831       5
4787        4
7430        3
25236       2
15020       2
10605       2
4687        2
6767        2
10566       2
4934        1
7896        1
6514        1
11678       1
5556        1
22040       1
6418        1
34095       1
Name: Capital Gain, dtype: int64

# Running apriori adaptation

We now generate the frequent itemsets of the datasets. These are used by the global counterfactual generating algorithm, in order to try and cover as many affected individuals as possible.

Here, we have implemented a simple `runApriori` wrapper function, which basically runs the fpgrowth algorithm for frequent itemset mining of the library mlxtend. It returns a dataframe with two columns, an $itemset$ column which contains the itemsets, in the form value tuples, and a $support$ column, which contains the relative frequency with which the itemset is contained in the dataset. Meaning, the fraction of individuals which have this specific combination of feature values.

Notice that we can give a minimum support as an argument to the function. This means that it returns only those itemsets whose support are above this value.

*Note*: You can ignore the "+feature_name" part. It has been appended to every value for implementation reasons, so that we know the "type" of each value, i.e. the feature it corresponds to. For example, whether a 0 is a value for "Capital Loss" or for "Capital Gain".

In [46]:
males_affected = X_aff[X_aff["Sex"] == "Male"].drop([sensitive_attribute], axis=1)
males_unaffected = X_unaff[X_unaff["Sex"] == "Male"].drop([sensitive_attribute], axis=1)
females_affected = X_aff[X_aff["Sex"] == "Female"].drop([sensitive_attribute], axis=1)
females_unaffected = X_unaff[X_unaff["Sex"] == "Female"].drop([sensitive_attribute], axis=1)

In [47]:
freq_ma = runApriori(preprocessDataset(males_affected), min_support=0.03)
freq_mu = runApriori(preprocessDataset(males_unaffected), min_support=0.03)
freq_fa = runApriori(preprocessDataset(females_affected), min_support=0.03)
freq_fu = runApriori(preprocessDataset(females_unaffected), min_support=0.03)

In [48]:
RL_ma = aprioriout2predicateList(freq_ma)
RL_mu = aprioriout2predicateList(freq_mu)
RL_fa = aprioriout2predicateList(freq_fa)
RL_fu = aprioriout2predicateList(freq_fu)

In [49]:
d = X_test.drop([sensitive_attribute], axis=1)
freq_itemsets = runApriori(preprocessDataset(d), min_support=0.03)
freq_itemsets.reset_index()
print(freq_itemsets.head())

     support                                 itemsets
0   0.954141                         (0+Capital Loss)
1   0.916368                         (0+Capital Gain)
2   0.895179                  (United-States+Country)
55  0.870509         (0+Capital Gain, 0+Capital Loss)
56  0.854028  (0+Capital Loss, United-States+Country)


In [50]:
print(freq_itemsets.head(100).to_string())

       support                                                                                                      itemsets
0     0.954141                                                                                              (0+Capital Loss)
1     0.916368                                                                                              (0+Capital Gain)
2     0.895179                                                                                       (United-States+Country)
55    0.870509                                                                              (0+Capital Gain, 0+Capital Loss)
56    0.854028                                                                       (0+Capital Loss, United-States+Country)
3     0.853823                                                                                                  (White+Race)
57    0.819941                                                                       (0+Capital Gain, United-States+Country)


In [51]:
print(freq_itemsets.tail(100))
# print(freq_itemsets.tail(100).to_string()) # uncomment for more details

       support                                           itemsets
3463  0.030505  (Some-college+Education, White+Race, Husband+R...
5481  0.030505  (9+Education-Num, Other-service+Occupation, Pr...
5039  0.030505  (White+Race, 0+Capital Gain, 9+Education-Num, ...
368   0.030402  (Own-child+Relationship, Never-married+Marital...
5296  0.030402  (0+Capital Loss, White+Race, 0+Capital Gain, A...
...        ...                                                ...
5543  0.030095  (Married-civ-spouse+Marital Status, Husband+Re...
3928  0.030095  (0+Capital Loss, Never-married+Marital Status,...
3927  0.030095  (0+Capital Loss, Never-married+Marital Status,...
5706  0.030095  (Married-civ-spouse+Marital Status, Masters+Ed...
5733  0.030095  (14+Education-Num, Married-civ-spouse+Marital ...

[100 rows x 2 columns]


Next, we use the function `aprioriout2predicateList`, which "casts" the output of the frequent itemset mining algorithm to our internal representation of a "triple" (as in the ares paper). This representation is the class `Predicate`.

RL is the initial set of candidate predicates (taken as the output of the itemset algo), from which we will then pick pairs to represent our rules (as in ares paper).

In [52]:
RL = aprioriout2predicateList(freq_itemsets)
pprint(RL[:10])
print(len(RL))

[Predicate(features=['Capital Loss'], values=['0']),
 Predicate(features=['Capital Gain'], values=['0']),
 Predicate(features=['Country'], values=['United-States']),
 Predicate(features=['Capital Gain', 'Capital Loss'], values=['0', '0']),
 Predicate(features=['Capital Loss', 'Country'], values=['0', 'United-States']),
 Predicate(features=['Race'], values=['White']),
 Predicate(features=['Capital Gain', 'Country'], values=['0', 'United-States']),
 Predicate(features=['Capital Loss', 'Race'], values=['0', 'White']),
 Predicate(features=['Race', 'Country'], values=['White', 'United-States']),
 Predicate(features=['Capital Gain', 'Race'], values=['0', 'White'])]
5734


# Running the optimization procedure

First, just turn the user-defined SD (subgroup descriptors) to predicates.

In [53]:
SD = list(map(Predicate.from_dict, [
    {sensitive_attribute: val} for val in data[sensitive_attribute].unique()
]))
print(SD)

[Predicate(features=['Sex'], values=['Male']), Predicate(features=['Sex'], values=['Female'])]


In [54]:
rules = [(Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education', 'Capital Gain', 'Relationship'], values=['Bachelors', '0', 'Husband']),
   Predicate(features=['Education', 'Capital Gain', 'Relationship'], values=['Some-college', '0', 'Husband'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Workclass', 'Marital Status', 'Relationship'], values=['Private', 'Divorced', 'Not-in-family']),
   Predicate(features=['Workclass', 'Marital Status', 'Relationship'], values=['Private', 'Married-civ-spouse', 'Wife'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['9']),
   Predicate(features=['Education-Num'], values=['14'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['10']),
   Predicate(features=['Education-Num'], values=['14'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education', 'Marital Status'], values=['Bachelors', 'Never-married']),
   Predicate(features=['Education', 'Marital Status'], values=['Some-college', 'Married-civ-spouse'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status', 'Education-Num'], values=['Married-civ-spouse', '10']),
   Predicate(features=['Marital Status', 'Education-Num'], values=['Married-civ-spouse', '13'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['11']),
   Predicate(features=['Education-Num'], values=['14'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['7']),
   Predicate(features=['Education-Num'], values=['14'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status', 'Education'], values=['Married-civ-spouse', 'Bachelors']),
   Predicate(features=['Marital Status', 'Education'], values=['Married-civ-spouse', 'HS-grad'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status', 'Relationship'], values=['Divorced', 'Unmarried']),
   Predicate(features=['Marital Status', 'Relationship'], values=['Married-civ-spouse', 'Wife'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education', 'Capital Loss', 'Race'], values=['Masters', '0', 'White']),
   Predicate(features=['Education', 'Capital Loss', 'Race'], values=['Some-college', '0', 'White'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education', 'Education-Num'], values=['Assoc-acdm', '12']),
   Predicate(features=['Education', 'Education-Num'], values=['Masters', '14'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Capital Loss', 'Education-Num', 'Capital Gain', 'Marital Status'], values=['0', '9', '0', 'Married-civ-spouse']),
   Predicate(features=['Capital Loss', 'Education-Num', 'Capital Gain', 'Marital Status'], values=['0', '13', '0', 'Married-civ-spouse']))]

In [55]:
from recourse_sets import TwoLevelRecourseSet

In [56]:
rules = TwoLevelRecourseSet.from_triples(rules)

In [57]:
print(rules)

If Sex = Female:
	If Workclass = Private, Marital Status = Divorced, Relationship = Not-in-family,
	Then Workclass = Private, Marital Status = Married-civ-spouse, Relationship = Wife.
	If Marital Status = Married-civ-spouse, Education-Num = 10,
	Then Marital Status = Married-civ-spouse, Education-Num = 13.
	If Marital Status = Never-married,
	Then Marital Status = Married-civ-spouse.
	If Marital Status = Married-civ-spouse, Education = Bachelors,
	Then Marital Status = Married-civ-spouse, Education = HS-grad.
	If Marital Status = Divorced, Relationship = Unmarried,
	Then Marital Status = Married-civ-spouse, Relationship = Wife.
	If Capital Loss = 0, Education-Num = 9, Capital Gain = 0, Marital Status = Married-civ-spouse,
	Then Capital Loss = 0, Education-Num = 13, Capital Gain = 0, Marital Status = Married-civ-spouse.
If Sex = Male:
	If Education = Bachelors, Capital Gain = 0, Relationship = Husband,
	Then Education = Some-college, Capital Gain = 0, Relationship = Husband.
	If Educati

In [58]:
model.predict(X_test)

array([0, 0, 0, ..., 0, 0, 0])

In [59]:
from metrics import incorrectRecoursesSubmodular, incorrectRecourses, cover, featureCost, featureChange

In [60]:
print(X_aff.shape)

(7768, 14)


In [61]:
%%time
print(incorrectRecoursesSubmodular(rules, X_aff, model))

4814
CPU times: total: 14.1 s
Wall time: 2.96 s


In [62]:
%%time
print(incorrectRecourses(rules, X_aff, model))

4814
CPU times: total: 3.83 s
Wall time: 5.56 s


In [63]:
%%time
print(cover(rules, X_aff))

6213
CPU times: total: 859 ms
Wall time: 783 ms


In [64]:
print(lib2.recourse_report(rules, X_aff, model))

If Sex = Female:
	If Workclass = Private, Marital Status = Divorced, Relationship = Not-in-family,
	Then Workclass = Private, Marital Status = Married-civ-spouse, Relationship = Wife.
		Coverage: 2.896% over all affected.
		Incorrect recourses additive: 63.556% over all individuals covered by this rule.
		Incorrect recourses at-least-one: 63.556% over all individuals covered by this rule.
	If Marital Status = Married-civ-spouse, Education-Num = 10,
	Then Marital Status = Married-civ-spouse, Education-Num = 13.
		Coverage: 0.554% over all affected.
		Incorrect recourses additive: 88.372% over all individuals covered by this rule.
		Incorrect recourses at-least-one: 88.372% over all individuals covered by this rule.
	If Marital Status = Never-married,
	Then Marital Status = Married-civ-spouse.
		Coverage: 18.409% over all affected.
		Incorrect recourses additive: 92.448% over all individuals covered by this rule.
		Incorrect recourses at-least-one: 92.448% over all individuals covered by

Now, we run the submodular optimization.

In [28]:
%%time

ifthen_triples = np.random.choice(RL, 400, replace=False)
affected_sample = X_aff.iloc[np.random.choice(X_aff.shape[0], size=400, replace=False), :]
final_rules = optimize(SD, ifthen_triples, affected_sample, model)

Total triples = 52
Calculated incorrect recourse for each triple
Calculated feature costs for each triple
Calculated feature changes for each feature
Calculated covers for each triple
CPU times: total: 8.2 s
Wall time: 987 ms


In [29]:
final_rules

([(Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Occupation', 'Marital Status', 'Race'], values=['Craft-repair', 'Married-civ-spouse', 'White']),
   Predicate(features=['Occupation', 'Marital Status', 'Race'], values=['Exec-managerial', 'Married-civ-spouse', 'White'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status', 'Country', 'Hours per week', 'Capital Gain', 'Capital Loss'], values=['Never-married', 'United-States', '40', '0', '0']),
   Predicate(features=['Marital Status', 'Country', 'Hours per week', 'Capital Gain', 'Capital Loss'], values=['Married-civ-spouse', 'United-States', '50', '0', '0']))],
 -43,
 59,
 -3,
 -3)

In [33]:
print(two_level_recourses)

If Sex = Female:
	If Marital Status = Never-married, Country = United-States, Hours per week = 40, Capital Gain = 0, Capital Loss = 0,
	Then Marital Status = Married-civ-spouse, Country = United-States, Hours per week = 50, Capital Gain = 0, Capital Loss = 0
If Sex = Male:
	If Occupation = Craft-repair, Marital Status = Married-civ-spouse, Race = White,
	Then Occupation = Exec-managerial, Marital Status = Married-civ-spouse, Race = White



In [35]:
two_level_recourses = TwoLevelRecourseSet.from_triples(final_rules[0])

print(incorrectRecourses(two_level_recourses, X_aff, model))
print(incorrectRecoursesSubmodular(two_level_recourses, X_aff, model))
print(cover(two_level_recourses, X_aff))
print(featureCost(two_level_recourses))
print(featureChange(two_level_recourses))

832
832
1073
3
3


In [29]:
pprint(TwoLevelRecourseSet.from_triples(final_rules[0]))

TwoLevelRecourseSet(feature='Sex', values=['Female', 'Male'], rules={'Female': RecourseSet(hypotheses=[Predicate(features=['Marital Status', 'Workclass'], values=['Divorced', 'Private'])], suggestions=[Predicate(features=['Marital Status', 'Workclass'], values=['Married-civ-spouse', 'Private'])]), 'Male': RecourseSet(hypotheses=[Predicate(features=['Marital Status', 'Workclass'], values=['Divorced', 'Private']), Predicate(features=['Capital Gain', 'Marital Status', 'Country', 'Education'], values=['0', 'Never-married', 'United-States', 'Bachelors'])], suggestions=[Predicate(features=['Marital Status', 'Workclass'], values=['Married-civ-spouse', 'Private']), Predicate(features=['Capital Gain', 'Marital Status', 'Country', 'Education'], values=['0', 'Married-civ-spouse', 'United-States', 'Bachelors'])])})


In [65]:
from optimization import _optimize

In [66]:
len(RL)
print(len(RL_ma))
print(len(RL_mu))
print(len(RL_fa))
print(len(RL_fu))

6535
8680
6492
7890


In [69]:
%%time

final_rules_separate = _optimize(
    SD,
    ifs={"Male": RL_ma[:500], "Female": RL_fa[:500]},
    thens={"Male": RL_mu[:500], "Female": RL_fu[:500]},
    X_aff=X_aff,
    model=model,
    params=params
)

Total triples = 294
Calculated incorrect recourse for each triple
Calculated feature costs for each triple
Calculated feature changes for each feature
Calculated covers for each triple
CPU times: total: 4min 52s
Wall time: 1min 17s


In [70]:
pprint(final_rules_separate)

([], 0, 0, 0, 0)


In [38]:
%%time

rules_separate = TwoLevelRecourseSet.from_triples(final_rules_separate[0])
print(f"X_aff shape: {X_aff.shape}")
print(f"Incorrect recourses as in AReS: {incorrectRecourses(rules_separate, X_aff, model)}")
print(f"Incorrect recourses at-least-one: {incorrectRecoursesSubmodular(rules_separate, X_aff, model)}")
print(f"Individuals covered: {cover(rules_separate, X_aff)}")

IndexError: list index out of range

In [39]:
print(lib2.recourse_report(rules_separate, X_aff, model))

NameError: name 'rules_separate' is not defined

In [59]:
pprint(final_rules_separate)

([(Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Capital Gain'], values=['0']),
   Predicate(features=['Capital Gain'], values=['15024'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Capital Gain'], values=['0']),
   Predicate(features=['Capital Gain'], values=['15024']))],
 -42,
 386,
 -2,
 -2)


In [26]:
%%time

final_rules = optimize(SD, RL, X_aff, model)

Total triples = 12836
X_aff shape before: (7754, 14)
Calculated incorrect recourse for each triple
Calculated feature costs for each triple
Calculated feature changes for each feature
X_aff shape after: (7754, 14)
Calculated covers for each triple
141
CPU times: total: 3h 57min 21s
Wall time: 46min 35s


In [27]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education', 'Capital Gain', 'Relationship'], values=['Bachelors', '0', 'Husband']),
   Predicate(features=['Education', 'Capital Gain', 'Relationship'], values=['Some-college', '0', 'Husband'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Workclass', 'Marital Status', 'Relationship'], values=['Private', 'Divorced', 'Not-in-family']),
   Predicate(features=['Workclass', 'Marital Status', 'Relationship'], values=['Private', 'Married-civ-spouse', 'Wife'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['9']),
   Predicate(features=['Education-Num'], values=['14'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['10']),
   Predicate(features=['Education-Num'], values=['14'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education', 'Marital Status'], value

In [20]:
print(X_aff.shape)

(7754, 14)


In [19]:
%%time

final_rules = optimize(SD, RL[:400], X_aff[:400], model)

Total triples = 364
X_aff shape before: (400, 14)
Calculated incorrect recourse for each triple
Calculated feature costs for each triple
Calculated feature changes for each feature
X_aff shape after: (400, 14)
Calculated covers for each triple
24
CPU times: total: 50.1 s
Wall time: 6.28 s


In [20]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['10']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education', 'Education-Num'], values=['HS-grad', '9']),
   Predicate(features=['Education', 'Education-Num'], values=['Bachelors', '13']))],
 -176,
 222,
 -4,
 -4)


In [19]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Relationship'], values=['Husband']),
   Predicate(features=['Relationship'], values=['Not-in-family'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse']))],
 -5121,
 5464,
 -3,
 -3)


In [30]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['10']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['9']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education'], values=['Bachelors']),
   Predicate(features=['Education'], values=['Some-college']))],
 -4085,
 4845,
 -4,
 -4)


In [30]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['10']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['9']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education'], values=['Bachelors']),
   Predicate(features=['Education'], values=['Some-college']))],
 -4085,
 4845,
 -4,
 -4)


In [19]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse']))],
 -1248,
 1362,
 -1,
 -1)


In [38]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse']))],
 -67,
 72,
 -1,
 -1)


In [19]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse']))],
 -70,
 75,
 -1,
 -1)


In [29]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Education-Num'], values=['9']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['9']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['10']),
   Predicate(features=['Education-Num'], values=['13']))],
 -159,
 206,
 -3,
 -3)
