## Tree Explainer

In [1]:
%load_ext autoreload
%autoreload 2

In [26]:
import matplotlib.pyplot as plt
from sklearn import datasets
import mpl_toolkits.mplot3d 
from sklearn.cluster import KMeans
from sklearn import metrics
import numpy as np
import pandas as pd
from cxplain.xkm import Xkm
from cxplain.tree import  DecisionTreeExplainer, RandomForestExplainer, ExKMCExplainer

In [3]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [4]:
data = np.array([[1, 2, 1, 2],
               [2, 3, 2, 3],
               [2, 2, 1, 2]])

In [5]:
kmeans = KMeans(n_clusters=2, random_state=3).fit(data)

In [6]:
kmeans.predict(data)

array([0, 1, 0])

In [27]:
kmeans = KMeans(n_clusters=3, random_state=3).fit(X)
cluster_centers = kmeans.cluster_centers_

In [8]:
predictions = kmeans.predict(X)
predictions

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2,
       2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2,
       2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0])

In [9]:
tree_explainer = DecisionTreeExplainer(data= X, cluster_predictions=predictions)

In [10]:
tree_explaind = tree_explainer.fit_explain()

R1    0.016162
R2    0.000000
R3    0.297972
R4    0.339733
Name: 0, dtype: float64


In [11]:
tree_explaind.global_relevance

R1    0.016162
R2    0.000000
R3    0.297972
R4    0.339733
Name: 0, dtype: float64

In [20]:
forest_explainer = RandomForestExplainer(data= X, cluster_predictions=predictions)

In [21]:
forest_explained = forest_explainer.fit_explain()

In [22]:
forest_explained.global_relevance

R1    0.156497
R2    0.020770
R3    0.527698
R4    0.295035
Name: 0, dtype: float64

In [23]:
pd.DataFrame([[0,1,2], [3,4,5]])

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5


In [30]:
from sklearn.datasets import make_blobs

# Create dataset
n = 100
d = 10
k = 3
X, _ = make_blobs(n, d, k, cluster_std=3.0)

TypeError: make_blobs() takes from 0 to 2 positional arguments but 3 positional arguments (and 1 keyword-only argument) were given

In [52]:
exkmc = ExKMCExplainer(X, kmeans, k=3, max_leaves=6, base_tree='NONE')

In [53]:
exp = exkmc.fit_explain()

In [54]:
exp.global_relevance

R1    2.0
R2    1.0
R3    2.0
R4    0.0
Name: 0, dtype: float64