In [1]:
import numpy as np
import pandas as pd
from pprint import pprint

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# place lib2.py, models.py and apriori.py in the same folder
import lib2
from lib2 import Predicate, optimize
from models import customXGB
from apriori import preprocessDataset, runApriori, aprioriout2predicateList

In [2]:
DATAFILE = '../adult.data' # location of dataset
random_state = None # change to something for exactly reproducible results
sensitive_attribute = "Sex"
target_name = "label"
positive_label = ">50K"
negative_label = "<=50K"
model_train_fraction = 0.7

In [3]:
# specify feature names
feature_names = [
   "Age", "Workclass", "fnlwgt", "Education",
   "Education-Num", "Marital Status", "Occupation",
   "Relationship", "Race", "Sex", "Capital Gain",
   "Capital Loss", "Hours per week", "Country", "label"
]
# specify categorical columns
cate_columns = ['Workclass', 'Education', 'Marital Status', 'Occupation', 'Relationship', 'Race', 'Sex', 'Country']

In [4]:
# define featureCost as a mapping holding, for each feature name, the respective cost of not keeping that feature constant
# Any feature not specified will have its cost set to 1
featureCosts = {"Sex": 100}

# define featureChange as a mapping from str to function, holding, for each feature name, the respective 
# function that calculates the cost of change from one value to another.
# Any feature change not specified will be set to 1 if there is change, and 0 otherwise.
def age_cost(age1: str, age2: str) -> int:
    return abs(int(age1) - int(age2))
featureChange = {"Age": age_cost}

# set the weights that manage the relative influence of coverage, correctness, feature cost and feature change
# in the objective function of the algorithm
l_cover = 1
l_correct = 1
l_cost = 1
l_change = 1

In [5]:
lib2.setFeatureCost(featureCosts)
lib2.setFeatureChange(featureChange)
lib2.set_lambdas(l_cover, l_correct, l_cost, l_change)

# Data loading

Loads the dataset into variable `data`. Just run it.

In [6]:
data = pd.DataFrame(
  np.genfromtxt(DATAFILE, delimiter=', ', dtype=str),
  columns=feature_names
)

# Train test split

Split into train-test. The train set is used specifically for training the model, and nothing further.

In [7]:
X = data.drop(target_name, axis=1)
y = data[target_name]

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=model_train_fraction, random_state=random_state)

X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

# Definition and Training of a black-box model

We use a black box model based on gradient boosted decision trees.

In [None]:
model = customXGB(n_estimators=300, max_depth=5)
model.fit(X_train, y_train, cate_columns=cate_columns)

<models.customXGB at 0x2ba14959d30>

0 denotes the negative and 1 the positive class.

In [None]:
model.predict(X_test.iloc[:100, :])

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0])

# Testing the model

We check if the model is satisfactory.

In [None]:
preds = model.predict(X_test)
print(classification_report(y_test.map({negative_label: 0, positive_label: 1}), preds))

              precision    recall  f1-score   support

           0       0.89      0.94      0.92      7450
           1       0.77      0.64      0.70      2319

    accuracy                           0.87      9769
   macro avg       0.83      0.79      0.81      9769
weighted avg       0.87      0.87      0.87      9769



# Finding the affected

We check the predictions of the model on the test data, i.e. our population. The ones that get a 0 (negative outcome) are called the affected individuals.

In [None]:
X_aff_idxs = np.where(model.predict(X_test) == 0)[0]
print(data.shape)
print(X_aff_idxs.shape)
X_aff = X_test.iloc[X_aff_idxs, :]
print(X_aff.shape)
# X_aff.reset_index(inplace=True, drop=True)

(32561, 15)
(7832,)
(7832, 14)


This means that the model classifies into the negative class 7754 out of the 32561 total individuals.

# Running apriori adaptation

We now generate the frequent itemsets of the datasets. These are used by the global counterfactual generating algorithm, in order to try and cover as many affected individuals as possible.

Here, we have implemented a simple `runApriori` wrapper function, which basically runs the fpgrowth algorithm for frequent itemset mining of the library mlxtend. It returns a dataframe with two columns, an $itemset$ column which contains the itemsets, in the form value tuples, and a $support$ column, which contains the relative frequency with which the itemset is contained in the dataset. Meaning, the fraction of individuals which have this specific combination of feature values.

Notice that we can give a minimum support as an argument to the function. This means that it returns only those itemsets whose support are above this value.

*Note*: You can ignore the "+feature_name" part. It has been appended to every value for implementation reasons, so that we know the "type" of each value, i.e. the feature it corresponds to. For example, whether a 0 is a value for "Capital Loss" or for "Capital Gain".

In [23]:
d = X_test.drop([sensitive_attribute], axis=1)
freq_itemsets = runApriori(preprocessDataset(d), min_support=0.03)
freq_itemsets.reset_index()
print(freq_itemsets.head())

     support                                 itemsets
0   0.953731                         (0+Capital Loss)
1   0.917596                         (0+Capital Gain)
2   0.902037                  (United-States+Country)
54  0.871328         (0+Capital Gain, 0+Capital Loss)
55  0.859965  (United-States+Country, 0+Capital Loss)


In [25]:
print(freq_itemsets.head(100).to_string())

       support                                                                                                      itemsets
0     0.953731                                                                                              (0+Capital Loss)
1     0.917596                                                                                              (0+Capital Gain)
2     0.902037                                                                                       (United-States+Country)
54    0.871328                                                                              (0+Capital Gain, 0+Capital Loss)
55    0.859965                                                                       (United-States+Country, 0+Capital Loss)
11    0.854335                                                                                                  (White+Race)
56    0.827106                                                                       (United-States+Country, 0+Capital Gain)


In [15]:
print(freq_itemsets.tail(100))
# print(freq_itemsets.tail(100).to_string()) # uncomment for more details

       support                                           itemsets
4716  0.030402  (Married-civ-spouse+Marital Status, Sales+Occu...
2028  0.030402  (HS-grad+Education, Not-in-family+Relationship...
2039  0.030402  (Not-in-family+Relationship, Never-married+Mar...
2041  0.030402  (HS-grad+Education, Not-in-family+Relationship...
2164  0.030402  (Adm-clerical+Occupation, HS-grad+Education, U...
...        ...                                                ...
5809  0.030095  (Exec-managerial+Occupation, 40+Hours per week...
5762  0.030095  (Exec-managerial+Occupation, United-States+Cou...
4726  0.030095  (United-States+Country, 0+Capital Gain, Sales+...
4603  0.030095  (Husband+Relationship, Craft-repair+Occupation...
2676  0.030095  (10+Education-Num, Own-child+Relationship, 0+C...

[100 rows x 2 columns]


Next, we use the function `aprioriout2predicateList`, which "casts" the output of the frequent itemset mining algorithm to our internal representation of a "triple" (as in the ares paper). This representation is the class `Predicate`.

RL is the initial set of candidate predicates (taken as the output of the itemset algo), from which we will then pick pairs to represent our rules (as in ares paper).

In [16]:
RL = aprioriout2predicateList(freq_itemsets)
pprint(RL[:10])
print(len(RL))

[Predicate(features=['Capital Loss'], values=['0']),
 Predicate(features=['Capital Gain'], values=['0']),
 Predicate(features=['Country'], values=['United-States']),
 Predicate(features=['Capital Gain', 'Capital Loss'], values=['0', '0']),
 Predicate(features=['Country', 'Capital Loss'], values=['United-States', '0']),
 Predicate(features=['Race'], values=['White']),
 Predicate(features=['Country', 'Capital Gain'], values=['United-States', '0']),
 Predicate(features=['Race', 'Capital Loss'], values=['White', '0']),
 Predicate(features=['Country', 'Race'], values=['United-States', 'White']),
 Predicate(features=['Country', 'Capital Gain', 'Capital Loss'], values=['United-States', '0', '0'])]
5878


# Running the optimization procedure

First, just turn the user-defined SD (subgroup descriptors) to predicates.

In [17]:
SD = list(map(Predicate.from_dict, [
    {sensitive_attribute: val} for val in data[sensitive_attribute].unique()
]))
print(SD)

[Predicate(features=['Sex'], values=['Male']), Predicate(features=['Sex'], values=['Female'])]


Now, we run the submodular optimization.

In [26]:
%%time

final_rules = optimize(SD, RL, X_aff, model)

Total triples = 12836
X_aff shape before: (7754, 14)
Calculated incorrect recourse for each triple
Calculated feature costs for each triple
Calculated feature changes for each feature
X_aff shape after: (7754, 14)
Calculated covers for each triple
141
CPU times: total: 3h 57min 21s
Wall time: 46min 35s


In [27]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education', 'Capital Gain', 'Relationship'], values=['Bachelors', '0', 'Husband']),
   Predicate(features=['Education', 'Capital Gain', 'Relationship'], values=['Some-college', '0', 'Husband'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Workclass', 'Marital Status', 'Relationship'], values=['Private', 'Divorced', 'Not-in-family']),
   Predicate(features=['Workclass', 'Marital Status', 'Relationship'], values=['Private', 'Married-civ-spouse', 'Wife'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['9']),
   Predicate(features=['Education-Num'], values=['14'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['10']),
   Predicate(features=['Education-Num'], values=['14'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education', 'Marital Status'], value

In [20]:
print(X_aff.shape)

(7754, 14)


In [19]:
%%time

final_rules = optimize(SD, RL[:400], X_aff[:400], model)

Total triples = 364
X_aff shape before: (400, 14)
Calculated incorrect recourse for each triple
Calculated feature costs for each triple
Calculated feature changes for each feature
X_aff shape after: (400, 14)
Calculated covers for each triple
24
CPU times: total: 50.1 s
Wall time: 6.28 s


In [20]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['10']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education', 'Education-Num'], values=['HS-grad', '9']),
   Predicate(features=['Education', 'Education-Num'], values=['Bachelors', '13']))],
 -176,
 222,
 -4,
 -4)


In [19]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Relationship'], values=['Husband']),
   Predicate(features=['Relationship'], values=['Not-in-family'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse']))],
 -5121,
 5464,
 -3,
 -3)


In [30]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['10']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['9']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education'], values=['Bachelors']),
   Predicate(features=['Education'], values=['Some-college']))],
 -4085,
 4845,
 -4,
 -4)


In [30]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['10']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['9']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education'], values=['Bachelors']),
   Predicate(features=['Education'], values=['Some-college']))],
 -4085,
 4845,
 -4,
 -4)


In [19]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse']))],
 -1248,
 1362,
 -1,
 -1)


In [38]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse']))],
 -67,
 72,
 -1,
 -1)


In [19]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Marital Status'], values=['Never-married']),
   Predicate(features=['Marital Status'], values=['Married-civ-spouse']))],
 -70,
 75,
 -1,
 -1)


In [29]:
pprint(final_rules)

([(Predicate(features=['Sex'], values=['Female']),
   Predicate(features=['Education-Num'], values=['9']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['9']),
   Predicate(features=['Education-Num'], values=['13'])),
  (Predicate(features=['Sex'], values=['Male']),
   Predicate(features=['Education-Num'], values=['10']),
   Predicate(features=['Education-Num'], values=['13']))],
 -159,
 206,
 -3,
 -3)
