In [1]:
from sklearn.datasets import load_iris, load_boston
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from nice.explainers import NICE
import numpy as np
import pandas as pd

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
df_iris = load_iris(as_frame=True).frame
df_iris.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [4]:
df_iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   target             150 non-null    int32  
dtypes: float64(4), int32(1)
memory usage: 5.4 KB


In [5]:
outcome_name = "target"
continuous_features_iris = df_iris.drop(outcome_name, axis=1).columns.tolist()
target = df_iris[outcome_name]

In [6]:
datasetX = df_iris.drop(outcome_name, axis=1)

In [7]:
X = datasetX
y = target
X = X.values #only supports arrays atm
y= y.values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [8]:
cat_feat = []
num_feat = [0,1,2,3]

In [9]:
clf = Pipeline([
    ('PP',ColumnTransformer([
            ('num',StandardScaler(),num_feat),
            ('cat',OneHotEncoder(handle_unknown = 'ignore'),cat_feat)])),
    ('RF',RandomForestClassifier())])

clf.fit(X_train,y_train)

Pipeline(steps=[('PP',
                 ColumnTransformer(transformers=[('num', StandardScaler(),
                                                  [0, 1, 2, 3]),
                                                 ('cat',
                                                  OneHotEncoder(handle_unknown='ignore'),
                                                  [])])),
                ('RF', RandomForestClassifier())])

In [10]:
predict_fn_class0 = lambda x: np.array([[max(i[1], i[2]), i[0]] for i in clf.predict_proba(x)])

In [11]:
predict_fn_class1 = lambda x: np.array([[max(i[0], i[2]), i[1]] for i in clf.predict_proba(x)])

In [12]:
predict_fn_class2 = lambda x: np.array([[max(i[0], i[1]), i[2]] for i in clf.predict_proba(x)])

X_train[5] is of class 1 type with 0.92 probability of becoming class 1. This may be a different datapoint due to random train test split when you rerun

In [22]:
clf.predict_proba(X_train[5].reshape((1,4)))

array([[0.  , 0.92, 0.08]])

In [24]:
predict_fn_class0(X_train[5].reshape((1,4)))

array([[0.92, 0.  ]])

In [25]:
predict_fn_class1(X_train[5].reshape((1,4)))

array([[0.08, 0.92]])

In [26]:
predict_fn_class2(X_train[5].reshape((1,4)))

array([[0.92, 0.08]])

In [27]:
def get_counterfactuals_for_desired_class(desired_class=0):
    NICE_class = NICE(optimization='sparsity',
                  justified_cf=False)
    if(desired_class == 0):
        pred_fn_class = predict_fn_class0
    elif(desired_class == 1):
        pred_fn_class = predict_fn_class1
    else:
        pred_fn_class = predict_fn_class2
        
    NICE_class.fit(X_train = X_train,
               predict_fn=pred_fn_class,
               y_train = None,
               cat_feat=cat_feat,
               num_feat=num_feat)
    return NICE_class

In [29]:
to_explain = X_train[5].reshape((1,4))
desired_class = clf.predict(to_explain)
load_cf_class = get_counterfactuals_for_desired_class(desired_class)
CF = load_cf_class.explain(to_explain)

In [30]:
to_explain

array([[6.9, 3.1, 4.9, 1.5]])

In [31]:
CF

array([[6.9, 3.1, 5.8, 1.5]])

In [32]:
clf.predict(to_explain)

array([1])

In [33]:
clf.predict(CF)

array([2])