In [1]:
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

import pandas as pd
import time
import metrics

# counterfactuals libraries
import dice_ml
from nice import NICE

import tensorflow as tf
tf.get_logger().setLevel(40) 
tf.compat.v1.disable_v2_behavior() 
from alibi.explainers import CounterfactualProto
from alibi.utils import ohe_to_ord, ord_to_ohe

2023-10-03 18:10:18.838510: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-03 18:10:19.294982: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-03 18:10:19.299769: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


# 引用数据并做一些预处理

In [2]:
data = pd.read_csv('data/german.data', delimiter=' ', header=None)
features = data.iloc[:, :-1]
labels = data.iloc[:, -1].values - 1 # 1 is replaced by 0 (好) and 2 is replaced by 1(坏)

# 文本特征index
categorical_cols = [0, 2, 3, 5, 6, 8, 9, 11, 13, 14, 16, 18, 19]

# 每个文本特征的取值范围
attribute_mappings = {
    0: {"A11": "<0 DM", "A12": "0-200 DM", "A13": ">=200 DM", "A14": "noaccount"},
    2: {"A30": "no credits", "A31": "all paid", "A32": "paid till", "A33": "late pay", "A34": "critical"},
    3: {"A40": "car(new)", "A41": "car(used)", "A42": "furniture", "A43": "radio/tv", "A44": "appliances", "A45": "repairs", "A46": "education", "A47": "vacation?", "A48": "retrain", "A49": "business", "A410": "others"},
    5: {"A61": "<100 DM", "A62": "100-500DM", "A63": "500-1kDM", "A64": ">=1k DM", "A65": "unknown"},
    6: {"A71": "unemploy", "A72": "<1 year", "A73": "1-3yrs", "A74": "4-6yrs", "A75": ">=7yrs"},
    8: {"A91": "male:div", "A92": "fem:div/mar", "A93": "male:single", "A94": "male:mar", "A95": "fem:single"},
    9: {"A101": "none", "A102": "co-app", "A103": "guarantor"},
    11: {"A121": "realest", "A122": "life ins", "A123": "car", "A124": "unknown"},
    13: {"A141": "bank", "A142": "stores", "A143": "none"},
    14: {"A151": "rent", "A152": "own", "A153": "free"},
    16: {"A171": "unemploy", "A172": "unskilled", "A173": "skilled", "A174": "mgmt/self"},
    18: {"A191": "none", "A192": "yes"},
    19: {"A201": "yes", "A202": "no"}
    }

In [3]:
# 将特征值映射到数据集上
for col, mapping in attribute_mappings.items():
    for key, value in mapping.items():
        mask = features.iloc[:, col] == key
        features.loc[mask, col] = value
        
# 将文本特征保存起来
category_map_tmp = {}
for col in categorical_cols:
    le = LabelEncoder()
    features[col] = le.fit_transform(features[col].values)
    category_map_tmp[col] = list(le.classes_)
    
# Convert features to numpy array for consistency with the other fetch function
features = features.values

In [4]:
target_name = 'Label'

In [5]:
# 要进行counterfactual的个数
N_CF = 20

In [6]:
# Feature and target names
feature_names = [
    'ExistingChecking', 'Duration', 'CreditHistory', 'Purpose', 'CreditAmount', 'SavingsAccount',
    'EmploymentSince', 'InstallmentRatePercentage', 'PersonalStatusSex', 'OtherDebtors','PresentResidenceSince',
    'Property', 'Age', 'OtherInstallmentPlans', 'Housing', 'ExistingCreditsAtBank', 'Job', 
    'PeopleLiableToProvideMaintenance', 'Telephone', 'ForeignWorker'
]
    
    
target_names = ['Good', 'Bad']

In [7]:
features.shape, labels.shape

((1000, 20), (1000,))

In [8]:
data.columns = feature_names + ["Label"]
data['Label'] = data['Label'] - 1
df = data
df.head()

Unnamed: 0,ExistingChecking,Duration,CreditHistory,Purpose,CreditAmount,SavingsAccount,EmploymentSince,InstallmentRatePercentage,PersonalStatusSex,OtherDebtors,...,Property,Age,OtherInstallmentPlans,Housing,ExistingCreditsAtBank,Job,PeopleLiableToProvideMaintenance,Telephone,ForeignWorker,Label
0,A11,6,A34,A43,1169,A65,A75,4,A93,A101,...,A121,67,A143,A152,2,A173,1,A192,A201,0
1,A12,48,A32,A43,5951,A61,A73,2,A92,A101,...,A121,22,A143,A152,1,A173,1,A191,A201,1
2,A14,12,A34,A46,2096,A61,A74,2,A93,A101,...,A121,49,A143,A152,1,A172,2,A191,A201,0
3,A11,42,A32,A42,7882,A61,A74,2,A93,A103,...,A122,45,A143,A153,1,A173,2,A191,A201,0
4,A11,24,A33,A40,4870,A61,A73,3,A93,A101,...,A124,53,A143,A153,2,A173,2,A191,A201,1


In [9]:
# 选择文本特征和数值特征列
categorical_features = [feature_names[i] for i in category_map_tmp.keys()]
categorical_ids = list(category_map_tmp.keys())

numerical_features = [name for i, name in enumerate(feature_names) if i not in category_map_tmp.keys()]
numerical_ids = [i for i in range(len(feature_names)) if i not in category_map_tmp.keys()]

# X, Y = features, labels
X, Y = df.iloc[:, :-1], df.iloc[:, -1]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=7)

In [10]:
categorical_ids, numerical_ids

([0, 2, 3, 5, 6, 8, 9, 11, 13, 14, 16, 18, 19], [1, 4, 7, 10, 12, 15, 17])

# DICE 方法

In [11]:
# Define numerical standard scaler.
num_transf = StandardScaler()

# Define categorical one-hot encoder.
cat_transf = OneHotEncoder(
    categories=[range(len(x)) for x in category_map_tmp.values()],
    handle_unknown="ignore"
)

# Define column transformer
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", cat_transf, categorical_ids),
        ("num", num_transf, numerical_ids),
    ],
    sparse_threshold=0
)
# Fit preprocessor.
preprocessor.fit(X_train)

# Preprocess train and test dataset.
X_train_ohe = preprocessor.transform(X_train)

# Select one of the below classifiers.
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Fit the classifier.
clf.fit(X_train_ohe, Y_train)
# Define prediction function.
predictor = lambda x: clf.predict_proba(preprocessor.transform(x))

print(classification_report(y_true=Y_test, y_pred=predictor(X_test).argmax(axis=1)))

              precision    recall  f1-score   support

           0       0.80      0.91      0.85       149
           1       0.55      0.33      0.41        51

    accuracy                           0.76       200
   macro avg       0.67      0.62      0.63       200
weighted avg       0.73      0.76      0.74       200



In [12]:
d = dice_ml.Data(dataframe=df, continuous_features=numerical_features, outcome_name=target_name)

# 使用sklearn作为backend
backend = 'sklearn'

# 将sklearn的预测器包装成dice_ml的预测器
class ModelWrapper:
    def __init__(self, predictor_func):
        self.predictor_func = predictor_func
    
    def predict_proba(self, instances):
        return self.predictor_func(instances)
    
    def predict(self, instances):
       
        prob_preds = self.predictor_func(instances)
    
        return np.argmax(prob_preds, axis=1)

model_wrapper = ModelWrapper(predictor)

m = dice_ml.Model(model=model_wrapper, backend=backend)

In [13]:
# 计算proximity
dice_method = "random"

dice_result = []

for i in range(N_CF):
    counterfactuals_list = []
    exp = dice_ml.Dice(d, m, method=dice_method)
    query_instance_df = pd.DataFrame([X_test.iloc[i]])
    dice_exp = exp.generate_counterfactuals(query_instance_df, total_CFs=5, desired_class="opposite")
    
    final_cfs_df = dice_exp.cf_examples_list[0].final_cfs_df
    
    counterfactuals_list.append((query_instance_df, final_cfs_df))
    
    metrics_dice = metrics.calculate_metrics(
        counterfactuals_list, df,
        numerical_features, categorical_features,
        preprocessor, dice_method,target_name
    )
    dice_result.append(metrics_dice)

  0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  1.70it/s]
100%|██████████| 1/1 [00:00<00:00,  1.81it/s]
100%|██████████| 1/1 [00:00<00:00,  1.96it/s]
100%|██████████| 1/1 [00:00<00:00,  2.23it/s]
100%|██████████| 1/1 [00:00<00:00,  2.11it/s]
100%|██████████| 1/1 [00:00<00:00,  2.04it/s]
100%|██████████| 1/1 [00:00<00:00,  2.26it/s]
100%|██████████| 1/1 [00:00<00:00,  2.02it/s]
100%|██████████| 1/1 [00:00<00:00,  2.60it/s]
100%|██████████| 1/1 [00:00<00:00,  2.62it/s]
100%|██████████| 1/1 [00:00<00:00,  1.97it/s]
100%|██████████| 1/1 [00:00<00:00,  2.21it/s]
100%|██████████| 1/1 [00:00<00:00,  2.36it/s]
100%|██████████| 1/1 [00:00<00:00,  1.57it/s]
100%|██████████| 1/1 [00:00<00:00,  1.43it/s]
100%|██████████| 1/1 [00:00<00:00,  1.92it/s]
100%|██████████| 1/1 [00:00<00:00,  2.04it/s]
100%|██████████| 1/1 [00:00<00:00,  1.81it/s]
100%|██████████| 1/1 [00:00<00:00,  1.95it/s]
100%|██████████| 1/1 [00:00<00:00,  1.21it/s]


In [14]:
# calculate the average of the metrics
dice_avg_proximity_cont = np.mean([x['avg_proximity_cont'] for x in dice_result])
dice_avg_proximity_cat = np.mean([x['avg_proximity_cat'] for x in dice_result])
dice_avg_sparsity = np.mean([x['avg_sparsity'] for x in dice_result])

print(f"Average proximity for continuous features: {dice_avg_proximity_cont}")
print(f"Average proximity for categorical features: {dice_avg_proximity_cat}")
print(f"Average sparsity: {dice_avg_sparsity}")

Average proximity for continuous features: 0.0
Average proximity for categorical features: 0.0537037037037037
Average sparsity: 2.9


In [15]:
# validate and time

# Select some positive examples.
X_negative = X_test[np.argmax(predictor(X_test), axis=1) == 0]
query_instance_df = pd.DataFrame(X_negative[0:N_CF], columns=feature_names)

counterfactuals_list = []
dice_time_list = []
dice_validity_list = []

for _, instance in query_instance_df.iterrows():
    instance_df = pd.DataFrame(instance).T
    
    start_time = time.time()
    
    dice_exp = exp.generate_counterfactuals(instance_df, total_CFs=5, desired_class="opposite")
    
    time_taken = time.time() - start_time
    dice_time_list.append(time_taken)
    
    # dice_exp[0].cf_examples_list[0].final_cfs_df
    if hasattr(dice_exp, 'cf_examples_list') and dice_exp.cf_examples_list[0]:
        cf_df = dice_exp.cf_examples_list[0].final_cfs_df
        if cf_df is not None:
            counterfactuals_list.append((instance_df, cf_df))
            dice_validity_list.append(1)
        else:
            counterfactuals_list.append((instance_df, None))
            dice_validity_list.append(0)
    else:
        counterfactuals_list.append((instance_df, None))
        dice_validity_list.append(0)

dice_avg_time = np.mean(dice_time_list)
dice_avg_validity = np.mean(dice_validity_list)

print("Average Time Taken per instance:", dice_avg_time)
print("Average Validity:", dice_avg_validity) 

  0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  2.34it/s]
100%|██████████| 1/1 [00:00<00:00,  1.71it/s]
100%|██████████| 1/1 [00:00<00:00,  2.38it/s]
100%|██████████| 1/1 [00:00<00:00,  2.60it/s]
100%|██████████| 1/1 [00:00<00:00,  2.35it/s]
100%|██████████| 1/1 [00:00<00:00,  2.42it/s]
100%|██████████| 1/1 [00:00<00:00,  3.08it/s]
100%|██████████| 1/1 [00:00<00:00,  2.70it/s]
100%|██████████| 1/1 [00:00<00:00,  2.73it/s]
100%|██████████| 1/1 [00:00<00:00,  2.74it/s]
100%|██████████| 1/1 [00:00<00:00,  2.28it/s]
100%|██████████| 1/1 [00:00<00:00,  1.88it/s]
100%|██████████| 1/1 [00:00<00:00,  1.83it/s]
100%|██████████| 1/1 [00:00<00:00,  1.38it/s]
100%|██████████| 1/1 [00:00<00:00,  1.58it/s]
100%|██████████| 1/1 [00:00<00:00,  2.29it/s]
100%|██████████| 1/1 [00:00<00:00,  2.08it/s]
100%|██████████| 1/1 [00:00<00:00,  2.14it/s]
100%|██████████| 1/1 [00:00<00:00,  2.12it/s]
100%|██████████| 1/1 [00:00<00:00,  2.53it/s]

Average Time Taken per instance: 0.4659137845039368
Average Validity: 1.0





In [16]:
dice_exp.visualize_as_dataframe()

Query instance (original outcome : 0)


Unnamed: 0,ExistingChecking,Duration,CreditHistory,Purpose,CreditAmount,SavingsAccount,EmploymentSince,InstallmentRatePercentage,PersonalStatusSex,OtherDebtors,...,Property,Age,OtherInstallmentPlans,Housing,ExistingCreditsAtBank,Job,PeopleLiableToProvideMaintenance,Telephone,ForeignWorker,Label
0,A12,11,A34,A40,1322,A64,A73,4,A92,A101,...,A123,40,A143,A152,2,A173,1,A191,A201,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,ExistingChecking,Duration,CreditHistory,Purpose,CreditAmount,SavingsAccount,EmploymentSince,InstallmentRatePercentage,PersonalStatusSex,OtherDebtors,...,Property,Age,OtherInstallmentPlans,Housing,ExistingCreditsAtBank,Job,PeopleLiableToProvideMaintenance,Telephone,ForeignWorker,Label
0,A12,42.0,A34,A40,1322.0,A64,A73,4,A92,A101,...,A123,40,A143,A152,2.0,A173,2.0,A191,A201,1
1,A12,69.0,A34,A40,1322.0,A64,A73,4,A92,A101,...,A123,40,A143,A152,4.0,A173,1.0,A191,A201,1
2,A12,54.0,A34,A40,1322.0,A64,A73,4,A92,A101,...,A123,40,A143,A152,2.0,A173,1.0,A192,A201,1
3,A12,53.0,A34,A40,1322.0,A64,A73,4,A92,A101,...,A123,40,A143,A152,2.0,A173,1.0,A191,A202,1
4,A12,11.0,A34,A40,15618.0,A64,A73,4,A92,A101,...,A123,40,A143,A152,2.0,A173,1.0,A191,A201,0


# NICE方法

In [17]:
X_nice = X.values
y_nice = Y.values

X_train_nice, X_test_nice, y_train_nice, y_test_nice = train_test_split(X_nice, y_nice, test_size=0.2, random_state=42)

clf_nice = Pipeline([
    ('preprocessor', ColumnTransformer(
        [
            ('num', num_transf , numerical_ids),
            ('cat', cat_transf, categorical_ids)
        ]
    )),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))]
)

clf_nice.fit(X_train_nice, y_train_nice)

In [18]:
predict_fn_nice = lambda x: clf_nice.predict_proba(x)

NICE_explainer = NICE(
    X_train=X_train_nice,
    predict_fn=predict_fn_nice,
    y_train=y_train_nice,
    cat_feat=categorical_ids,
    num_feat=numerical_ids
)

In [19]:
# 计算proximity

nice_result = []

for i in range(N_CF):
    counterfactuals_list_nice = []
    
    query_instance_df_nice = pd.DataFrame(X_test_nice[i:i+1,:], columns=feature_names)
    nice_exp = NICE_explainer.explain(X_test_nice[i:i+1,:])[0]

    final_cfs_nice = pd.DataFrame([nice_exp], columns=feature_names)
    final_cfs_nice[target_name] = clf_nice.predict(final_cfs_nice)
    
    counterfactuals_list_nice.append((query_instance_df_nice, final_cfs_nice))
    
    metrics_nice = metrics.calculate_metrics(
        counterfactuals_list_nice, df,
        numerical_features, categorical_features,
        preprocessor,'nice',target_name
    )
    nice_result.append(metrics_nice)



In [20]:
counterfactuals_list_nice

[(  ExistingChecking Duration CreditHistory Purpose CreditAmount SavingsAccount  \
  0              A11     15.0           A33     A42       3643.0            A61   
  
    EmploymentSince InstallmentRatePercentage PersonalStatusSex OtherDebtors  \
  0             A75                       1.0               A92         A101   
  
    PresentResidenceSince Property   Age OtherInstallmentPlans Housing  \
  0                   4.0     A122  27.0                  A143    A152   
  
    ExistingCreditsAtBank   Job PeopleLiableToProvideMaintenance Telephone  \
  0                   2.0  A172                              1.0      A191   
  
    ForeignWorker  
  0          A201  ,
    ExistingChecking  Duration CreditHistory Purpose  CreditAmount  \
  0              A11      30.0           A33     A42        3108.0   
  
    SavingsAccount EmploymentSince  InstallmentRatePercentage PersonalStatusSex  \
  0            A61             A75                        2.0               A92   
  
    O

In [21]:
# calculate the average of the metrics
nice_avg_proximity_cont = np.mean([x['avg_proximity_cont'] for x in nice_result])
nice_avg_proximity_cat = np.mean([x['avg_proximity_cat'] for x in nice_result])
nice_avg_sparsity = np.mean([x['avg_sparsity'] for x in nice_result])

print(f"Average proximity for continuous features: {nice_avg_proximity_cont}")
print(f"Average proximity for categorical features: {nice_avg_proximity_cat}")
print(f"Average sparsity: {nice_avg_sparsity}")

Average proximity for continuous features: 0.0
Average proximity for categorical features: 0.03518518518518519
Average sparsity: 1.9


In [22]:
# validate and time

X_negative_nice = X_test_nice[np.argmax(predict_fn_nice(X_test_nice), axis=1) == 0]
query_instance_df_nice = pd.DataFrame(X_negative_nice[0:N_CF], columns=feature_names)

counterfactuals_list_nice = []
nice_time_list = []
nice_validity_list = []

for _, instance in query_instance_df_nice.iterrows():
    instance_df = pd.DataFrame(instance).T
    
    start_time = time.time()
    
    nice_exp = NICE_explainer.explain(instance_df.values)
    
    time_taken = time.time() - start_time
    nice_time_list.append(time_taken)
    
    cf_df = pd.DataFrame([nice_exp[0]], columns=feature_names)
    cf_df_prob = clf_nice.predict_proba(cf_df)
    if cf_df_prob[0][1] > 0.55:
        counterfactuals_list_nice.append((instance_df, cf_df))
        nice_validity_list.append(1)
    else:
        counterfactuals_list_nice.append((instance_df, None))
        nice_validity_list.append(0) 
        
nice_avg_time = np.mean(nice_time_list)
nice_avg_validity = np.mean(nice_validity_list)

print("Average Time Taken per instance:", nice_avg_time)
print("Average Validity:", nice_avg_validity)



Average Time Taken per instance: 0.09855170249938965
Average Validity: 0.8




# PROTOTYPE 方法

In [23]:
categorical_ids

[0, 2, 3, 5, 6, 8, 9, 11, 13, 14, 16, 18, 19]

In [24]:
data_perm = np.random.permutation(np.c_[features, labels])
X_alibi = data_perm[:,:-1]
y_alibi = data_perm[:,-1]

idx = 800
y_train_alibi, y_test_alibi = y_alibi[:idx], y_alibi[idx:]

# 将文本特征和数值特征重新排列，使得文本特征在前，数值特征在后
X_alibi = np.c_[X_alibi[:,0], X_alibi[:,2:4], X_alibi[:,5:7],
                X_alibi[:,8:10], X_alibi[:,11], X_alibi[:,13:15],
                X_alibi[:, 16], X_alibi[:, 18:20], X_alibi[:,1],
                X_alibi[:,4], X_alibi[:,7], X_alibi[:,10],X_alibi[:,12],
                X_alibi[:,15], X_alibi[:,17]]

feature_names_alibi = feature_names[0:1] + feature_names[2:4] + feature_names[5:7] + feature_names[8:10] + feature_names[11:12] + feature_names[13:15] + feature_names[16:17] + feature_names[18:20] + feature_names[1:2] + feature_names[4:5] + feature_names[7:8] + feature_names[10:11] + feature_names[12:13] + feature_names[15:16] + feature_names[17:18]

print(feature_names_alibi)

['ExistingChecking', 'CreditHistory', 'Purpose', 'SavingsAccount', 'EmploymentSince', 'PersonalStatusSex', 'OtherDebtors', 'Property', 'OtherInstallmentPlans', 'Housing', 'Job', 'Telephone', 'ForeignWorker', 'Duration', 'CreditAmount', 'InstallmentRatePercentage', 'PresentResidenceSince', 'Age', 'ExistingCreditsAtBank', 'PeopleLiableToProvideMaintenance']


In [25]:
# 将文本特征进行one-hot编码,并记录其位置与长度

category_map = {}
for i, (_, v) in enumerate(category_map_tmp.items()):
    category_map[i] = v 
    
cat_vars_ord = {}
n_categories = len(list(category_map.keys()))
for i in range(n_categories):
    cat_vars_ord[i] = len(np.unique(X_alibi[:, i]))
print(cat_vars_ord)

cat_vars_ohe = ord_to_ohe(X_alibi, cat_vars_ord)[1]
print(cat_vars_ohe)

{0: 4, 1: 5, 2: 10, 3: 5, 4: 5, 5: 4, 6: 3, 7: 4, 8: 3, 9: 3, 10: 4, 11: 2, 12: 2}
{0: 4, 4: 5, 9: 10, 19: 5, 24: 5, 29: 4, 33: 3, 36: 4, 40: 3, 43: 3, 46: 4, 50: 2, 52: 2}


In [26]:
categorical_features_alibi = feature_names_alibi[0:len(categorical_ids)]
numerical_features_alibi = feature_names_alibi[len(categorical_ids):]

In [27]:
# 对数据进行预处理
X_num = X_alibi[:, -len(numerical_ids):].astype(np.float32, copy=False)
xmin, xmax = X_num.min(axis=0), X_num.max(axis=0)
rng = (-1., 1.)
X_num_scaled = (X_num - xmin) / (xmax - xmin) * (rng[1] - rng[0]) + rng[0]

X_cat = X_alibi[:, :-len(numerical_ids)].copy()
ohe = OneHotEncoder(categories='auto', sparse_output=False).fit(X_cat)
X_cat_ohe = ohe.transform(X_cat)

# 对特征位置进行重新排序，使得文本特征在前，数值特征在后
X_alibi = np.c_[X_cat_ohe, X_num_scaled].astype(np.float32, copy=False)
X_train_alibi, X_test_alibi = X_alibi[:idx, :], X_alibi[idx:, :]
print(X_train_alibi.shape, X_test_alibi.shape)

(800, 61) (200, 61)


In [28]:
# define data preprocessor
num_transf = StandardScaler()
cat_transf = OneHotEncoder(
    categories=[range(len(x)) for x in category_map.values()],
    handle_unknown='ignore'
)
preprocessor_alibi = ColumnTransformer(
    transformers=[
        ('cat', cat_transf, categorical_ids),
        ('num', num_transf, numerical_ids)
    ],
    sparse_threshold=0
)

# fit data preprocessor
preprocessor_alibi = preprocessor_alibi.fit(features)

In [29]:
# 训练模型
clf_ablit = RandomForestClassifier(n_estimators=100, random_state=42)
clf_ablit.fit(X_train_alibi, y_train_alibi)

In [30]:
def predict_fn(x):
    pred_prob = clf_ablit.predict_proba(x)
    return np.hstack([1 - pred_prob[:,1].reshape(-1, 1), pred_prob[:,1].reshape(-1, 1)])

In [31]:
# Initialize the explainer object
X_alibi = X_test_alibi[0].reshape((1,) + X_test_alibi[0].shape)

shape = X_alibi.shape
beta = .01
c_init = 1.
c_steps = 5
max_iterations = 500
rng = (-1., 1.)  # scale features between -1 and 1
rng_shape = (1,) + features.shape[1:]
feature_range = ((np.ones(rng_shape) * rng[0]).astype(np.float32), 
                 (np.ones(rng_shape) * rng[1]).astype(np.float32))

In [32]:
cf = CounterfactualProto(predict_fn,
                         shape,
                         beta=beta,
                         cat_vars=cat_vars_ohe,
                         ohe=True,  # OHE flag
                         max_iterations=max_iterations,
                         feature_range=feature_range,
                         c_init=c_init,
                         c_steps=c_steps
                        )

cf.fit(X_train_alibi, d_type='abdm', disc_perc=[25, 50, 75])



CounterfactualProto(meta={
  'name': 'CounterfactualProto',
  'type': ['blackbox', 'tensorflow', 'keras'],
  'explanations': ['local'],
  'params': {
              'kappa': 0.0,
              'beta': 0.01,
              'gamma': 0.0,
              'theta': 0.0,
              'cat_vars': {
                            0: 4,
                            4: 5,
                            9: 10,
                            19: 5,
                            24: 5,
                            29: 4,
                            33: 3,
                            36: 4,
                            40: 3,
                            43: 3,
                            46: 4,
                            50: 2,
                            52: 2}
                          ,
              'ohe': True,
              'use_kdtree': False,
              'learning_rate_init': 0.01,
              'max_iterations': 500,
              'c_init': 1.0,
              'c_steps': 5,
              'eps': (0.001, 0.

In [33]:
def describe_instance(X, explanation, target_names, eps=1e-2):
    print('Original instance: {}  -- proba: {}'.format(target_names[explanation.orig_class],
                                                       explanation.orig_proba[0]))
    print('Counterfactual instance: {}  -- proba: {}'.format(target_names[explanation.cf['class']],
                                                             explanation.cf['proba'][0]))
    print('\nCounterfactual perturbations...')
    print('\nCategorical:')
    X_orig_ord = ohe_to_ord(X, cat_vars_ohe)[0]
    X_cf_ord = ohe_to_ord(explanation.cf['X'], cat_vars_ohe)[0]
    delta_cat = {}
    for i, (_, v) in enumerate(category_map.items()):
        cat_orig = v[int(X_orig_ord[0, i])]
        cat_cf = v[int(X_cf_ord[0, i])]
        if cat_orig != cat_cf:
            delta_cat[feature_names_alibi[i]] = [cat_orig, cat_cf]
    if delta_cat:
        for k, v in delta_cat.items():
            print('{}: {}  -->   {}'.format(k, v[0], v[1]))
    print('\nNumerical:')
    delta_num = X_cf_ord[0, -4:] - X_orig_ord[0, -4:]
    n_keys = len(list(cat_vars_ord.keys()))
    for i in range(delta_num.shape[0]):
        if np.abs(delta_num[i]) > eps:
            print('{}: {:.2f}  -->   {:.2f}'.format(feature_names_alibi[i+n_keys],
                                            X_orig_ord[0,i+n_keys],
                                            X_cf_ord[0,i+n_keys]))
            

def calculate_proximity_pro(X_orig_ord, X_cf_ord, explanation, df):
    counterfactuals_list = []
    
    query_instance_df_alibi = pd.DataFrame(X_orig_ord, columns=feature_names_alibi)
    final_cfs_alibi = pd.DataFrame(X_cf_ord, columns=feature_names_alibi)
    final_cfs_alibi[target_name] = 0 if explanation.cf['proba'][0][1] < 0.5 else 1
    
    counterfactuals_list.append((query_instance_df_alibi, final_cfs_alibi))
    
    metrics_alibi = metrics.calculate_metrics(
        counterfactuals_list, df, numerical_features_alibi, 
        categorical_features_alibi, preprocessor_alibi,"prototype",target_name
    )
    
    return metrics_alibi

In [34]:
pd_german= pd.DataFrame(features, columns=feature_names)
pd_german["Label"] = labels

In [35]:
y_pred_alibi=predict_fn(X_test_alibi).argmax(axis=1)
instances_alibi = X_test_alibi[y_pred_alibi == 1][:N_CF]

metrics_alibi = []
time_alibi = []
counterfactuals_list_alibi = []

for i in range(N_CF):
    
    instance = instances_alibi[i].reshape(1, -1)
    start_time = time.time()
    explanation = cf.explain(instance)
    end_take = time.time() - start_time
    time_alibi.append(end_take)
    if explanation.cf is not None:
        counterfactuals_list_alibi.append(explanation.cf['X'])
        X_orig_ord = ohe_to_ord(X_alibi, cat_vars_ohe)[0]
        X_cf_ord = ohe_to_ord(explanation.cf['X'], cat_vars_ohe)[0]
        metric= calculate_proximity_pro(X_orig_ord, X_cf_ord, explanation, pd_german)
        metrics_alibi.append(metric)
        describe_instance(X_alibi, explanation, target_names)
    else:
        counterfactuals_list_alibi.append(None)

2023-10-03 18:13:41.934565: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:353] MLIR V1 optimization pass is not enabled


Original instance: Bad  -- proba: [0.47 0.53]
Counterfactual instance: Good  -- proba: [0.59 0.41]

Counterfactual perturbations...

Categorical:
ExistingChecking: 0-200 DM  -->   <0 DM
EmploymentSince: >=7yrs  -->   <1 year
PersonalStatusSex: male:single  -->   fem:div/mar
Property: unknown  -->   realest
Housing: free  -->   rent
Job: skilled  -->   unskilled
Telephone: none  -->   yes

Numerical:
CreditAmount: -0.69  -->   -0.67




Original instance: Bad  -- proba: [0.44 0.56]
Counterfactual instance: Good  -- proba: [0.51 0.49]

Counterfactual perturbations...

Categorical:
ExistingChecking: 0-200 DM  -->   <0 DM
CreditHistory: paid till  -->   all paid
Purpose: furniture  -->   business
EmploymentSince: >=7yrs  -->   1-3yrs
Property: unknown  -->   life ins
Housing: free  -->   rent
Telephone: none  -->   yes

Numerical:
Duration: -0.41  -->   -0.41
CreditAmount: -0.69  -->   -0.68


No counterfactual found!


Original instance: Bad  -- proba: [0.45 0.55]
Counterfactual instance: Good  -- proba: [0.52 0.48]

Counterfactual perturbations...

Categorical:
ExistingChecking: 0-200 DM  -->   <0 DM
Purpose: furniture  -->   radio/tv
EmploymentSince: >=7yrs  -->   1-3yrs
Property: unknown  -->   car
Housing: free  -->   own
Job: skilled  -->   mgmt/self

Numerical:
Duration: -0.41  -->   -0.32
CreditAmount: -0.69  -->   -0.65




Original instance: Bad  -- proba: [0.42 0.58]
Counterfactual instance: Good  -- proba: [0.54 0.46]

Counterfactual perturbations...

Categorical:
ExistingChecking: 0-200 DM  -->   <0 DM
Purpose: furniture  -->   radio/tv
SavingsAccount: 100-500DM  -->   unknown
PersonalStatusSex: male:single  -->   fem:div/mar
Property: unknown  -->   car
OtherInstallmentPlans: none  -->   bank
Housing: free  -->   own

Numerical:
Duration: -0.41  -->   -0.41
CreditAmount: -0.69  -->   -0.93




Original instance: Bad  -- proba: [0.49 0.51]
Counterfactual instance: Good  -- proba: [0.54 0.46]

Counterfactual perturbations...

Categorical:
CreditHistory: paid till  -->   critical
Purpose: furniture  -->   others
EmploymentSince: >=7yrs  -->   1-3yrs
OtherDebtors: none  -->   co-app
Property: unknown  -->   car
Housing: free  -->   own
Job: skilled  -->   mgmt/self
Telephone: none  -->   yes

Numerical:
Duration: -0.41  -->   -0.41
CreditAmount: -0.69  -->   0.29
InstallmentRatePercentage: 1.00  -->   -0.33
PresentResidenceSince: 1.00  -->   0.33




Original instance: Bad  -- proba: [0.48 0.52]
Counterfactual instance: Good  -- proba: [0.57 0.43]

Counterfactual perturbations...

Categorical:
CreditHistory: paid till  -->   all paid
EmploymentSince: >=7yrs  -->   1-3yrs
PersonalStatusSex: male:single  -->   male:mar
Property: unknown  -->   life ins
Housing: free  -->   rent

Numerical:
Duration: -0.41  -->   -0.68
CreditAmount: -0.69  -->   -0.89




Original instance: Bad  -- proba: [0.35 0.65]
Counterfactual instance: Good  -- proba: [0.53 0.47]

Counterfactual perturbations...

Categorical:
ExistingChecking: 0-200 DM  -->   <0 DM
EmploymentSince: >=7yrs  -->   1-3yrs
PersonalStatusSex: male:single  -->   male:div
Property: unknown  -->   realest
Housing: free  -->   rent
Job: skilled  -->   unskilled

Numerical:
Duration: -0.41  -->   -0.41
CreditAmount: -0.69  -->   -0.70




Original instance: Bad  -- proba: [0.36 0.64]
Counterfactual instance: Good  -- proba: [0.54 0.46]

Counterfactual perturbations...

Categorical:
CreditHistory: paid till  -->   no credits
Purpose: furniture  -->   radio/tv
SavingsAccount: 100-500DM  -->   unknown
EmploymentSince: >=7yrs  -->   <1 year
PersonalStatusSex: male:single  -->   fem:div/mar
Property: unknown  -->   life ins
Housing: free  -->   rent

Numerical:
Duration: -0.41  -->   -0.76
CreditAmount: -0.69  -->   -0.70
InstallmentRatePercentage: 1.00  -->   1.00




Original instance: Bad  -- proba: [0.42 0.58]
Counterfactual instance: Good  -- proba: [0.53 0.47]

Counterfactual perturbations...

Categorical:
SavingsAccount: 100-500DM  -->   unknown
EmploymentSince: >=7yrs  -->   <1 year
OtherDebtors: none  -->   co-app
Housing: free  -->   rent
Job: skilled  -->   mgmt/self
Telephone: none  -->   yes

Numerical:
Duration: -0.41  -->   -0.06
CreditAmount: -0.69  -->   -0.03


No counterfactual found!


Original instance: Bad  -- proba: [0.41 0.59]
Counterfactual instance: Good  -- proba: [0.52 0.48]

Counterfactual perturbations...

Categorical:
SavingsAccount: 100-500DM  -->   <100 DM
Property: unknown  -->   car
OtherInstallmentPlans: none  -->   bank
Housing: free  -->   own
Job: skilled  -->   unskilled

Numerical:
CreditAmount: -0.69  -->   -0.93
PresentResidenceSince: 1.00  -->   1.00




Original instance: Bad  -- proba: [0.3 0.7]
Counterfactual instance: Good  -- proba: [0.5 0.5]

Counterfactual perturbations...

Categorical:
ExistingChecking: 0-200 DM  -->   <0 DM
CreditHistory: paid till  -->   all paid
Purpose: furniture  -->   retrain
SavingsAccount: 100-500DM  -->   unknown
EmploymentSince: >=7yrs  -->   4-6yrs
Job: skilled  -->   unskilled

Numerical:
CreditAmount: -0.69  -->   -0.87
InstallmentRatePercentage: 1.00  -->   1.00
PresentResidenceSince: 1.00  -->   1.00




Original instance: Bad  -- proba: [0.4 0.6]
Counterfactual instance: Good  -- proba: [0.55 0.45]

Counterfactual perturbations...

Categorical:
ExistingChecking: 0-200 DM  -->   <0 DM
Purpose: furniture  -->   radio/tv
SavingsAccount: 100-500DM  -->   unknown
EmploymentSince: >=7yrs  -->   <1 year
Property: unknown  -->   car
Housing: free  -->   own

Numerical:
Duration: -0.41  -->   0.12
CreditAmount: -0.69  -->   -0.59




Original instance: Bad  -- proba: [0.49 0.51]
Counterfactual instance: Good  -- proba: [0.55 0.45]

Counterfactual perturbations...

Categorical:
CreditHistory: paid till  -->   late pay
PersonalStatusSex: male:single  -->   male:div
Housing: free  -->   own
Job: skilled  -->   mgmt/self
Telephone: none  -->   yes

Numerical:
Duration: -0.41  -->   -0.59
CreditAmount: -0.69  -->   -0.55


No counterfactual found!


Original instance: Bad  -- proba: [0.38 0.62]
Counterfactual instance: Good  -- proba: [0.51 0.49]

Counterfactual perturbations...

Categorical:
SavingsAccount: 100-500DM  -->   500-1kDM
EmploymentSince: >=7yrs  -->   1-3yrs
PersonalStatusSex: male:single  -->   fem:div/mar
OtherDebtors: none  -->   co-app
Property: unknown  -->   car
Housing: free  -->   rent

Numerical:
CreditAmount: -0.69  -->   -0.65




Original instance: Bad  -- proba: [0.42 0.58]
Counterfactual instance: Good  -- proba: [0.5 0.5]

Counterfactual perturbations...

Categorical:
ExistingChecking: 0-200 DM  -->   <0 DM
CreditHistory: paid till  -->   all paid
Purpose: furniture  -->   radio/tv
EmploymentSince: >=7yrs  -->   <1 year
OtherDebtors: none  -->   co-app
OtherInstallmentPlans: none  -->   bank
Job: skilled  -->   mgmt/self
Telephone: none  -->   yes

Numerical:
CreditAmount: -0.69  -->   -0.81




Original instance: Bad  -- proba: [0.39 0.61]
Counterfactual instance: Good  -- proba: [0.55 0.45]

Counterfactual perturbations...

Categorical:
ExistingChecking: 0-200 DM  -->   <0 DM
CreditHistory: paid till  -->   all paid
Purpose: furniture  -->   education
SavingsAccount: 100-500DM  -->   unknown
PersonalStatusSex: male:single  -->   fem:div/mar

Numerical:
CreditAmount: -0.69  -->   -0.90
Original instance: Bad  -- proba: [0.47 0.53]
Counterfactual instance: Good  -- proba: [0.64 0.36]

Counterfactual perturbations...

Categorical:
ExistingChecking: 0-200 DM  -->   <0 DM
CreditHistory: paid till  -->   all paid
EmploymentSince: >=7yrs  -->   4-6yrs
Property: unknown  -->   car
OtherInstallmentPlans: none  -->   bank
Housing: free  -->   own

Numerical:
CreditAmount: -0.69  -->   -0.64




In [37]:
# calculate the average of the proximity metrics
pro_avg_proximity_cont = np.mean([x['avg_proximity_cont'] for x in metrics_alibi])
pro_avg_proximity_cat = np.mean([x['avg_proximity_cat'] for x in metrics_alibi])
pro_avg_sparsity= np.mean([x['avg_proximity_cat'] for x in metrics_alibi])


print(f"Average proximity for continuous features: {pro_avg_proximity_cont}")
print(f"Average proximity for categorical features: {pro_avg_proximity_cat}")
print(f"Average sparsity: {pro_avg_sparsity}")

Average proximity for continuous features: 0.1764705882352941
Average proximity for categorical features: 0.18409586056644878
Average sparsity: 0.18409586056644878


In [36]:
# validate and time
pro_avg_time = np.mean(time_alibi)

invalid_count = [cf for cf in counterfactuals_list_alibi if cf is None]
pro_avg_validity = 1 - len(invalid_count) / N_CF
print("Average Time Taken per instance:", pro_avg_time)
print("Average Validity:", pro_avg_validity)

Average Time Taken per instance: 104.07225027084351
Average Validity: 0.85


# Results

In [38]:
result_dict = {
    "Dice": {
        "proximity_cont": dice_avg_proximity_cont,
        "proximity_cat": dice_avg_proximity_cat,
        "sparsity": dice_avg_sparsity,
        "time(s)": dice_avg_time,
        "validity": dice_avg_validity
    },
    "Nice": {
        "proximity_cont": nice_avg_proximity_cont,
        "proximity_cat": nice_avg_proximity_cat,
        "sparsity": nice_avg_sparsity,
        "time(s)": nice_avg_time,
        "validity": nice_avg_validity
    },
    "Prototype": {
        "proximity_cont": pro_avg_proximity_cont,
        "proximity_cat": pro_avg_proximity_cat,
        "sparsity": pro_avg_sparsity,
        "time(s)": pro_avg_time,
        "validity": pro_avg_validity
    }
}

result = pd.DataFrame(result_dict).T.round(3)
result

Unnamed: 0,proximity_cont,proximity_cat,sparsity,time(s),validity
Dice,0.0,0.054,2.9,0.466,1.0
Nice,0.0,0.035,1.9,0.099,0.8
Prototype,0.176,0.184,0.184,104.072,0.85
