Test for using actionable-recourse, provided on https://github.com/ustunb/actionable-recourse

In order to compare recourse for several similar classifiers, we use cross validation to fit several logistic regression models (Is this the right way?). In the next step, we want to check whether the flipsets generated for one of them apply also for the other classifiers.

In [1]:
import copy
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate
from sklearn.model_selection import StratifiedKFold as CVGenerator
from sklearn.model_selection import GridSearchCV
import recourse as rs
from recourse.builder import ActionSet #FIX
from recourse.flipset import Flipset #FIX
from recourse.auditor import RecourseAuditor #FIX

import data

In [2]:
url = 'https://raw.githubusercontent.com/ustunb/actionable-recourse/master/examples/paper/data/credit_processed.csv'
df = pd.read_csv(url, skipinitialspace=True)
y, X = df.iloc[:, 0], df.iloc[:, 1:]

NEW: Use Cross validation to train several different classifiers

In [3]:
alter_C = False
n_splits = 20
if not alter_C:
    clf = LogisticRegression(max_iter=10000)
    cv = cross_validate(clf, X, y, cv=n_splits, return_estimator=True)
    cv_scores = cv['test_score']
    classifiers = np.array(cv['estimator'])

Alternative: Use GridSearchCV on parameter C
**TODO:** NameError: name 'tqdm_notebook' is not defined

In [4]:
if alter_C:
    cv_generator = CVGenerator(n_splits = 10, random_state = 42)

    # this code is for general purpose train/test evaluation using GridSearchCV
    gridsearch = GridSearchCV(
        clf, param_grid={"C":[1.0 / np.exp(l) for l in np.linspace(0, 3, num=n_splits)]},
        scoring='neg_mean_squared_error',
        return_train_score=True,
        cv=cv_generator,
        verbose=1,
        n_jobs=-1
    )

    gridsearch.fit(X,y)
    grid_search_df = pd.DataFrame(gridsearch.cv_results_)

    # cache a model for each parameter combination, trained on all data
    model_dict = {}
    classifiers = []
    grid_search_df['key'] = pd.np.nan
    for idx, p in tqdm_notebook(list(grid_search_df.params.iteritems())):
        model = copy(clf.set_params(**p)).fit(X,y)

        key = '__'.join(map(lambda x: '%s_%s' % x, p.items()))
        model_dict[key] = model
        grid_search_df.loc[idx, 'key'] = key
        classifiers.append(model)
# MAYDO: To actually use this variant, the next part would have to be adapted
    grid_search_df

NEW: Select those classifiers that achieve performance within certain tolerance

In [5]:
#X_test = X[:3]
#for i, est in enumerate(scores['estimator']):
#    print(scores['test_score'][i], est.predict(X_test))
tolerance = 2*np.std(cv_scores)
good_classifiers = classifiers[cv_scores >= np.max(cv_scores) - tolerance]

print(np.max(cv_scores))
print(cv_scores.std())
print(len(good_classifiers))

0.826
0.009025580929287106
9


get predictions

In [6]:
yhat = [clf.predict(X) for clf in good_classifiers]

customize the set of actions and align

In [7]:
action_sets=[]
for clf in good_classifiers:
    ## matrix of features. ActionSet will learn default bounds and step-size.
    A = ActionSet(X)
    ## specify immutable variables
    A['Married'].mutable = False 
    ## can only specify properties for multiple variables using a list
    A[['Age_lt_25', 'Age_in_25_to_40', 'Age_in_40_to_59', 'Age_geq_60']].mutable = False 
    A['EducationLevel'].step_direction = 1  ## force conditional immutability.
    A['EducationLevel'].step_size = 1  ## set step-size to a custom value.
    A['EducationLevel'].step_type = "absolute"  ## force conditional immutability.
    A['EducationLevel'].bounds = (0, 3)
    A['TotalMonthsOverdue'].step_size = 1  ## set step-size to a custom value.
    A['TotalMonthsOverdue'].step_type = "absolute"  ## discretize on absolute values of feature rather than percentile values
    A['TotalMonthsOverdue'].bounds = (0, 100)  ## set bounds to a custom value.
    
    ## tells `ActionSet` which directions each feature should move in to produce positive change.
    A.align(clf)
    action_sets.append(A)

Not necessary: testing if action sets were aligned correctly

In [8]:
for j,clf in enumerate(good_classifiers):
    for i,c in enumerate(X.columns):
        if action_sets[j]._elements[c].flip_direction != np.sign(clf.coef_[:,i]):
            print("Not well aligned", j, i)

Not necessary: build a flipset for one individual

In [19]:
%%capture 
#discard the output of this cell (fs.populate prints way to much)
j_clf = 0 # choose one classifier
i = np.flatnonzero(yhat[j_clf] <= 0)[0] # first individuum with negative outcome
x = copy.deepcopy(X.iloc[i]).to_numpy()
x_old = copy.deepcopy(X.iloc[i]).to_numpy()
fs = Flipset(x = x, action_set = action_sets[j_clf], clf = good_classifiers[j_clf])
fs.populate(enumeration_type = 'distinct_subsets', total_items = 10)

print(X.iloc[i])
print(x)
k = 0
# apply the k-th action in the flipset
for j, f in enumerate(fs._df['features'][k]):
    fi = fs._df['feature_idx'][j]
    print("set", f, "from", x[fi], "to", fs._df['x_new'][k][j])
    x[fi] = fs._df['x_new'][k][j]
print(x)

# check, for which classifiers the outcome has changed
for clf in good_classifiers:
    print(clf.predict(x_old.reshape(1,-1)), "to", clf.predict(x.reshape(1,-1)))

# display flipset
from IPython.display import HTML
HTML(fs.to_html())

NEW: change inputs according to flipsets

In [None]:
%%capture
j_clf = 0 # flipset generated for j_th classifier TODO later: iterate j_clf?
k_fs = 0  # k-th flipset is applied; MAYDO later: iterate k_fs?
        # when iterating j_clf, we would probably not filter X here...
xs = copy.deepcopy(X.iloc[np.flatnonzero(yhat[j_clf] <= 0)]).to_numpy()
for i in range(len(xs)):
    fs = Flipset(x = xs[i], action_set = action_sets[j_clf], clf = good_classifiers[j_clf])
    fs.populate(enumeration_type = 'distinct_subsets', total_items = 10)
    for j, fi in enumerate(fs._df['feature_idx'][k_fs]):
        xs[i,fi] = fs._df['x_new'][k][j]

NEW: Measure for how many individuals the adjusted input leads to desirable outcomes for each classifier

In [17]:
flips = []
for clf in good_classifiers:
    ys = clf.predict(xs)
    flips.append(np.mean(ys))
flips

[1.0,
 0.5926910299003322,
 0.6853820598006645,
 0.48372093023255813,
 0.4601328903654485,
 0.6863787375415282,
 0.5395348837209303,
 0.6790697674418604,
 0.5877076411960133]

**TODO:** Adapt from here to end: (How) do we want t use the auditor?

Run Recourse Audit on Training Data

In [15]:
auditor = RecourseAuditor(action_sets[j_clf], coefficients = good_classifiers[j_clf].coef_[0], intercept = good_classifiers[j_clf].intercept_[0])
audit_df = auditor.audit(X)  ## matrix of features over which we will perform the audit.

HBox(children=(FloatProgress(value=0.0, max=2971.0), HTML(value='')))




print mean feasibility and cost of recourse

In [16]:
print(audit_df['feasible'].mean())
print(audit_df['cost'].mean())

1.0
0.044684389267020806


**TODO**: Generate flipsets for each good classifier and test whether it changes the outcame when the ather good classifiers are used.