In [1]:
!pip install LIME

Looking in indexes: https://pypi.org/simple, https://www.piwheels.org/simple
Collecting LIME
  Downloading https://archive1.piwheels.org/simple/lime/lime-0.2.0.1-py3-none-any.whl (283 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m283.8/283.8 kB[0m [31m455.2 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting tqdm
  Downloading https://www.piwheels.org/simple/tqdm/tqdm-4.67.1-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.5/78.5 kB[0m [31m182.1 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: tqdm, LIME
Successfully installed LIME-0.2.0.1 tqdm-4.67.1


In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [3]:
from lime.lime_tabular import LimeTabularExplainer

In [4]:
data = load_breast_cancer()

In [5]:
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

In [6]:
data.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [8]:
cols = ['mean radius', 'mean texture', 'mean perimeter', 'mean area', 'mean smoothness']
X = X[cols]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [10]:
clf = RandomForestClassifier(n_estimators=200, random_state=42)
clf.fit(X_train, y_train)

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [12]:
# Choose and instance
inst_idx = 3
x = X_test.iloc[inst_idx].values

In [13]:
explainer = LimeTabularExplainer(
    training_data=X_train.values,
    feature_names=cols,
    class_names=['malignant', 'benign'],
    mode='classification',
    discretize_continuous=True
)

In [14]:
exp = explainer.explain_instance(x, clf.predict_proba, num_features=5)



In [15]:
print("Predicted probabilities: ", clf.predict_proba([x])[0])
print("\nLIME explanation (feature, weight): ")
for feat, weight in exp.as_list():
    print(feat, weight)

Predicted probabilities:  [0.025 0.975]

LIME explanation (feature, weight): 
mean smoothness > 0.10 -0.17410760044165377
75.50 < mean perimeter <= 85.94 0.14015631634599623
421.65 < mean area <= 545.65 0.08028826274454114
11.72 < mean radius <= 13.27 0.04127126304284745
16.18 < mean texture <= 18.69 0.035486335157979707


