In [1]:
import pandas as pd
import numpy as np
import sklearn
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import lime
import lime.lime_tabular
import random

In [2]:
df_train =  pd.read_csv('./archive/train.csv')
df_test = pd.read_csv('./archive/test.csv')

df_train.income = df_train.income.map({'<=50K':0, '>50K':1})
df_test.income = df_test.income.map({'<=50K':0, '>50K':1})

In [3]:
le = sklearn.preprocessing.LabelEncoder()
for col in df_train.columns:
    if df_train[col].dtype == 'object':
        le.fit(df_train[col])
        df_train[col] = le.transform(df_train[col])
        df_test[col] = le.transform(df_test[col])

In [4]:
random_state = 39
exp_iter = 10
random.seed(random_state)

#Get datasets
X_train = df_train.drop('income', axis=1)
y_train = df_train.income
X_test = df_test.drop('income', axis=1)
y_test = df_test.income
test_x = X_test.values
n_classes = len(np.unique(y_train))
feat_list = [each.replace(' ','_') for each in X_train.columns]
X = np.vstack((X_train.values, test_x))

In [5]:
class_names = ['<=50K', '>50K']
lime_explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names = feat_list, class_names=class_names, discretize_continuous=True)

In [6]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

RandomForestClassifier()

In [7]:
from feat_atr import FeatureAttribution

In [8]:
ex = lime_explainer.explain_instance(test_x[10], model.predict_proba, num_features=len(feat_list))
atr = sorted(ex.as_map()[1])
sorted_atr = [j for i,j in atr]
y = np.zeros(n_classes, dtype=int)
np.put(y, y_test[10], 1)
example = FeatureAttribution(model, test_x[10], y, sorted_atr)

In [9]:
example.monotonicity()

0.3682489824031308

In [10]:
example.non_sensitivity()

1

In [11]:
sorted_feat = [i for i,j in ex.as_map()[1]]
example.effective_complexity(sorted_feat, 0.1)

14