Test for using actionable-recourse, provided on https://github.com/ustunb/actionable-recourse

In order to compare recourse for several similar classifiers, we use cross validation to fit several logistic regression models (Is this the right way?). In the next step, we want to check whether the flipsets generated for one of them apply also for the other classifiers.

In [24]:
import copy
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate
from sklearn.model_selection import StratifiedKFold as CVGenerator
from sklearn.model_selection import GridSearchCV
import recourse as rs
from recourse.builder import ActionSet #FIX
from recourse.flipset import Flipset #FIX
from recourse.auditor import RecourseAuditor #FIX

import data

In [2]:
url = 'https://raw.githubusercontent.com/ustunb/actionable-recourse/master/examples/paper/data/credit_processed.csv'
df = pd.read_csv(url, skipinitialspace=True)
y, X = df.iloc[:, 0], df.iloc[:, 1:]

NEW: Use Cross validation to train several different classifiers (takes some time!)

In [22]:
alter_C = True
n_splits = 20
if not alter_C:
    clf = LogisticRegression(max_iter=10000)
    cv = cross_validate(clf, X, y, cv=n_splits, return_estimator=True)
    cv_scores = cv['test_score']
    classifiers = np.array(cv['estimator'])

Alternative: Use GridSearchCV on parameter C (takes some time!)

In [23]:
if alter_C:
    cv_generator = CVGenerator(n_splits = 10)

    # this code is for general purpose train/test evaluation using GridSearchCV
    gridsearch = GridSearchCV(
        clf, param_grid={"C":[1.0 / np.exp(l) for l in np.linspace(0, 3, num=n_splits)]},
        scoring='accuracy',
        cv=cv_generator,
        verbose=1,
        n_jobs=-1
    )

    gridsearch.fit(X,y)
    grid_search_df = pd.DataFrame(gridsearch.cv_results_)

Fitting 10 folds for each of 20 candidates, totalling 200 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  7.7min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  8.0min finished


In [25]:
if alter_C:
    # cache a model for each parameter combination, trained on all data
    classifiers = []
    classifier_Cs = []
    for idx, p in tqdm(list(grid_search_df.params.iteritems())):
        model = copy.deepcopy(clf.set_params(**p)).fit(X,y)
        classifiers.append(model)
        classifier_Cs.append(p.items())
    cv_scores = grid_search_df['mean_test_score']
    classifiers = np.array(classifiers)
    # Is it inconsistent to consider score before fitting the whole train set? 
    # But otherwise, it would violate train-test splitting
    # CV + fit is a bit of an overkill here?

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




NEW: Select those classifiers that achieve performance within certain tolerance

In [37]:
#X_test = X[:3]
#for i, est in enumerate(scores['estimator']):
#    print(scores['test_score'][i], est.predict(X_test))
tolerance = 1*np.std(cv_scores)
good_classifiers = classifiers[cv_scores >= np.max(cv_scores) - tolerance]

print(np.max(cv_scores))
print(cv_scores.std())
print(len(good_classifiers))

0.8058666666666667
0.0003363025649689151
7


get predictions

In [38]:
yhat = [clf.predict(X) for clf in good_classifiers]

customize the set of actions and align

In [39]:
action_sets=[]
for clf in good_classifiers:
    ## matrix of features. ActionSet will learn default bounds and step-size.
    A = ActionSet(X)
    ## specify immutable variables
    A['Married'].mutable = False 
    ## can only specify properties for multiple variables using a list
    A[['Age_lt_25', 'Age_in_25_to_40', 'Age_in_40_to_59', 'Age_geq_60']].mutable = False 
    A['EducationLevel'].step_direction = 1  ## force conditional immutability.
    A['EducationLevel'].step_size = 1  ## set step-size to a custom value.
    A['EducationLevel'].step_type = "absolute"  ## force conditional immutability.
    A['EducationLevel'].bounds = (0, 3)
    A['TotalMonthsOverdue'].step_size = 1  ## set step-size to a custom value.
    A['TotalMonthsOverdue'].step_type = "absolute"  ## discretize on absolute values of feature rather than percentile values
    A['TotalMonthsOverdue'].bounds = (0, 100)  ## set bounds to a custom value.
    
    ## tells `ActionSet` which directions each feature should move in to produce positive change.
    A.align(clf)
    action_sets.append(A)

NEW: change inputs according to flipsets (takes some time!)

In [40]:
%%capture
j_clf = 0 # flipset generated for j_th classifier TODO later: iterate j_clf?
k_fs = 0  # k-th flipset is applied; MAYDO later: iterate k_fs?
        # when iterating j_clf, we would probably not filter X here...
xs = copy.deepcopy(X.iloc[np.flatnonzero(yhat[j_clf] <= 0)]).to_numpy()
for i in range(len(xs)):
    fs = Flipset(x = xs[i], action_set = action_sets[j_clf], clf = good_classifiers[j_clf])
    fs.populate(enumeration_type = 'distinct_subsets', total_items = 10)
    for j, fi in enumerate(fs._df['feature_idx'][k_fs]):
        xs[i,fi] = fs._df['x_new'][k][j]

NEW: Measure for how many individuals the adjusted input leads to desirable outcomes for each classifier

In [41]:
flips = []
for clf in good_classifiers:
    ys = clf.predict(xs)
    flips.append(np.mean(ys))
flips

[1.0,
 0.777345017851347,
 0.8617332035053554,
 0.8182408308990587,
 0.8234339500162285,
 0.801363193768257,
 0.5894190197987667]

Run Recourse Audit for each classifier on Training Data (Takes some time!)

In [42]:
audit = {"cost": [], "feasible": []}
for j, clf in enumerate(good_classifiers):
    auditor = RecourseAuditor(action_sets[j], coefficients = clf.coef_[0], intercept = clf.intercept_[0])
    audit_df = auditor.audit(X)  ## matrix of features over which we will perform the audit.
    audit["feasible"].append(audit_df['feasible'].mean())
    audit["cost"].append(audit_df['cost'].mean())
audit

HBox(children=(FloatProgress(value=0.0, max=3042.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3020.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2945.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2939.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2967.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2965.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3075.0), HTML(value='')))




{'cost': [0.04641990086296696,
  0.045324120817820154,
  0.04475382944622765,
  0.044295240369545893,
  0.044933442279032,
  0.044667496904708635,
  0.046365966464075786],
 'feasible': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]}