Test for using actionable-recorurse, provided on https://github.com/ustunb/actionable-recourse

In [15]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate
import recourse as rs
from recourse.builder import ActionSet #FIX
from recourse.flipset import Flipset #FIX
from recourse.auditor import RecourseAuditor #FIX

import data

In [4]:
url = 'https://raw.githubusercontent.com/ustunb/actionable-recourse/master/examples/paper/data/credit_processed.csv'
df = pd.read_csv(url, skipinitialspace=True)
y, X = df.iloc[:, 0], df.iloc[:, 1:]

NEW: Use Cross validation to train several different classifiers

In [20]:
clf = LogisticRegression(max_iter=10000)
cv = cross_validate(clf, X, y, cv=20, return_estimator=True)

NEW: Select those classifiers that achieve performance within certain tolerance

In [33]:
#X_test = X[:3]
#for i, est in enumerate(scores['estimator']):
#    print(scores['test_score'][i], est.predict(X_test))
cv_scores = cv['test_score']
tolerance = 2*np.std(cv_scores)
good_classifiers = np.array(cv['estimator'])[cv_scores >= np.max(cv_scores) - tolerance]

print(np.max(cv_scores))
print(cv_scores.std())
print(len(good_classifiers))

0.826
0.009025580929287106
9


train a classifier

In [5]:
clf = LogisticRegression(max_iter=1000) #FIX
clf.fit(X, y)
yhat = clf.predict(X)

customize the set of actions

In [4]:
A = ActionSet(X)  ## matrix of features. ActionSet will learn default bounds and step-size.

specify immutable variables

In [5]:
A['Married'].mutable = False

can only specify properties for multiple variables using a list

In [6]:
A[['Age_lt_25', 'Age_in_25_to_40', 'Age_in_40_to_59', 'Age_geq_60']].mutable = False

education level

In [7]:
A['EducationLevel'].step_direction = 1  ## force conditional immutability.
A['EducationLevel'].step_size = 1  ## set step-size to a custom value.
A['EducationLevel'].step_type = "absolute"  ## force conditional immutability.
A['EducationLevel'].bounds = (0, 3)

In [8]:
A['TotalMonthsOverdue'].step_size = 1  ## set step-size to a custom value.
A['TotalMonthsOverdue'].step_type = "absolute"  ## discretize on absolute values of feature rather than percentile values
A['TotalMonthsOverdue'].bounds = (0, 100)  ## set bounds to a custom value.

get model coefficients and align

In [9]:
A.align(clf)  ## tells `ActionSet` which directions each feature should move in to produce positive change.

Get one individual

In [10]:
i = np.flatnonzero(yhat <= 0)[0]

build a flipset for one individual

In [11]:
%%capture 
#discard the output of this cell (fs.populate prints way to much)
fs = Flipset(x = [X.iloc[i]], action_set = A, clf = clf) #FIX
fs.populate(enumeration_type = 'distinct_subsets', total_items = 10)

In [12]:
print(fs.to_latex())

\begin{tabular}{rlccc}
\toprule
 &          \textsc{Feature Subset} &  \textsc{Current Values} &               &  \textsc{Required Values} \\
0    &               \textit{TotalMonthsOverdue} &                      7.0 &  $\longrightarrow$ &                       3.0 \\
1    &               \textit{TotalMonthsOverdue} &                      7.0 &  $\longrightarrow$ &                       4.0 \\
1    &             \textit{MostRecentBillAmount} &                   2010.0 &  $\longrightarrow$ &                    1926.0 \\
1    &     \textit{MaxBillAmountOverLast6Months} &                   2060.0 &  $\longrightarrow$ &                    2508.0 \\
2    &               \textit{TotalMonthsOverdue} &                      7.0 &  $\longrightarrow$ &                       4.0 \\
2    &          \textit{MostRecentPaymentAmount} &                    100.0 &  $\longrightarrow$ &                     105.0 \\
2    &     \textit{MaxBillAmountOverLast6Months} &                   2060.0 &  $\longright

Run Recourse Audit on Training Data

In [13]:
auditor = RecourseAuditor(A, coefficients = clf.coef_[0], intercept = clf.intercept_[0])
audit_df = auditor.audit(X)  ## matrix of features over which we will perform the audit.

HBox(children=(FloatProgress(value=0.0, max=2938.0), HTML(value='')))




print mean feasibility and cost of recourse

In [14]:
print(audit_df['feasible'].mean())
print(audit_df['cost'].mean())

1.0
0.04416156067534439
