<a href="https://colab.research.google.com/github/Krish2208/explanability/blob/main/implementation_pyciu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install py-ciu

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import pandas as pd
import numpy as np
import sklearn
import sklearn.cluster
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import random
from ciu import determine_ciu

In [3]:
df_train =  pd.read_csv('/content/data/train.csv')
df_test = pd.read_csv('/content/data/test.csv')

df_train.income = df_train.income.map({'<=50K':0, '>50K':1})
df_test.income = df_test.income.map({'<=50K':0, '>50K':1})

In [4]:
le = sklearn.preprocessing.LabelEncoder()
for col in df_train.columns:
    if df_train[col].dtype == 'object':
        le.fit(df_train[col])
        df_train[col] = le.transform(df_train[col])
        df_test[col] = le.transform(df_test[col])

In [5]:
random_state = 39
exp_iter = 10
random.seed(random_state)

#Get datasets
X_train = df_train.drop('income', axis=1)
y_train = df_train.income
X_test = df_test.drop('income', axis=1)
y_test = df_test.income
test_x = X_test.values
n_classes = len(np.unique(y_train))
feat_list = [each.replace(' ','_') for each in X_train.columns]
X = np.vstack((X_train.values, test_x))

In [6]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

In [7]:
income_ciu = determine_ciu(
    X_test.iloc[10:11],
    model.predict_proba,
    X_train.to_dict('list'),
    samples = 1000,
    prediction_index = 1
)

In [8]:
type(income_ciu.ci)

dict

In [9]:
def exp_fn_blk(xtest):
    exp1 = []
    for i in range(len(xtest)):
        exp = determine_ciu(X_test.iloc[i:i+1], model.predict_proba, X_train.to_dict('list'), samples = 1000, prediction_index = 1)
        exp_list = [[feat_list.index(i), exp.ci[i]] for i in exp.ci]
        exp1.append(exp_list)
    return np.array(exp1)

In [10]:
exp1 = exp_fn_blk(X_test[:100])
exp2 = exp_fn_blk(X_test[:100])

In [11]:
def calc_identity(exp1, exp2):
    dis = np.array([np.array_equal(exp1[i],exp2[i]) for i in range(len(exp1))])
    total = dis.shape[0]
    true = np.sum(dis)
    score = (total-true)/total
    return score*100, true, total

def calc_separability(exp):
    wrong = 0
    for i in range(exp.shape[0]):
        for j in range(exp.shape[0]):
            if i == j:
                continue
            eq = np.array_equal(exp[i],exp[j])
            if eq:
                wrong = wrong + 1
    total = exp.shape[0]
    score = 100*abs(wrong)/total**2
    return wrong,total,total**2,score

def calc_stability(exp, labels):
    total = labels.shape[0]
    label_values = np.unique(labels)
    n_clusters = label_values.shape[0]
    init = np.array([[np.average(exp[np.where(labels == i)], axis = 0)] for i in label_values]).squeeze()
    ct = sklearn.cluster.KMeans(n_clusters = n_clusters, random_state=1, n_init=10, init = init)
    ct.fit(exp)
    error = np.sum(np.abs(labels-ct.labels_))
    if error/total > 0.5:
        error = total-error
    return error, total

In [12]:
i = calc_identity(exp1,exp2)
print(i)

s = calc_separability(test_x[:100])
print(s)

def enc_exp(exp, feature_num):
    enc_exp = np.zeros((len(exp),feature_num))
    for i in range(len(exp)):
        for j in range(len(exp[i])):
            enc_exp[i][int(exp[i,j,0])] = exp[i,j,1]
    return enc_exp

enc1 = enc_exp(exp1, len(feat_list))
sb = calc_stability(enc1, y_test[:100])
print(sb)

(49.0, 51, 100)
(0, 100, 10000, 0.0)
(32, 100)


In [13]:
import numpy as np
import scipy
from sklearn.metrics import log_loss

class FeatureAttribution:
    def __init__(self, model, inst, y, sorted_atr):
        self.model = model
        self.inst = inst
        self.y = y
        self.sorted_atr = sorted_atr
        self.losses = []
        self.atr_values = []

    def monotonicity(self):
        losses = []
        atr_values = []
        for i in range(len(self.sorted_atr)):
            atr = self.sorted_atr[i]
            new_inst = np.copy(self.inst)
            np.put(new_inst, i, -1)
            loss = log_loss(self.y, self.model.predict_proba(new_inst.reshape(1, -1))[0])
            losses.append(loss)
            atr_values.append(abs(atr))
        self.losses = losses
        self.atr_values = atr_values
        monotonicity = scipy.stats.spearmanr(losses, atr_values).correlation
        return monotonicity

    def non_sensitivity(self):
        loss_zeros = set([i for i in range(len(self.losses)) if self.losses[i] == 0])
        atr_zeros = set([i for i in range(len(self.atr_values)) if self.atr_values[i] == 0])
        non_sensitivity = len(loss_zeros.symmetric_difference(atr_zeros))
        return non_sensitivity
    
    def effective_complexity(self, sorted_feat, threshold):
        min_k = 0
        threshold = 0.1
        for i in range(len(sorted_feat)):
            new_inst = np.copy(self.inst)
            for j in range(i+1, len(sorted_feat)):
                np.put(new_inst, sorted_feat[j], -1)
            loss = log_loss(self.y, self.model.predict_proba(new_inst.reshape(1, -1))[0])
            if loss < threshold:
                min_k = i+1
        return min_k

In [14]:
list_monotonicity = []
list_non_sensitivity = []
list_effective_complexity = []

for i in range(len(test_x[:100])):
    atr = np.array(sorted(exp1[i], key=lambda x: x[1], reverse=True))
    sorted_atr = [j for i,j in atr]
    sorted_feat = [i for i,j in atr]
    y = np.zeros(n_classes, dtype=int)
    np.put(y, y_test[i], 1)
    example = FeatureAttribution(model, test_x[i], y, sorted_atr)
    list_monotonicity.append(example.monotonicity())
    list_non_sensitivity.append(example.non_sensitivity())
    list_effective_complexity.append(example.effective_complexity(sorted_feat, 0.1))

In [15]:
print(np.mean(list_monotonicity))
print(np.mean(list_non_sensitivity))
print(np.mean(list_effective_complexity))

print(np.median(list_monotonicity))
print(np.median(list_non_sensitivity))
print(np.median(list_effective_complexity))

0.017875652004764493
0.93
10.31
0.048227615025518505
0.0
14.0
