In [1]:
import numpy as np
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score

from mountaineer import Mountaineer
from gale import create_mapper

  from .autonotebook import tqdm as notebook_tqdm


## Explain fuctions

In [2]:
from lime.lime_tabular import LimeTabularExplainer

def run_lime(X, model, num_features=4, num_samples=100):
    explainer = LimeTabularExplainer(X, discretize_continuous=False, random_state=2020)
    lime_exp = []
    for x in X:
        exp = explainer.explain_instance(x, model.predict_proba, num_features=num_features, num_samples=num_samples)
        tmp = [0 for i in range(X.shape[1])]
        for e in exp.as_list():
            tmp[int(e[0])] = e[1]
        lime_exp.append(tmp)
    lime_exp = np.array(lime_exp)
    return lime_exp

## Generate dataset

In [3]:
np.random.seed(2023)
X1 = np.random.multivariate_normal([-1.5,-1.5], [[1,0],[0,1]], size=250)
y1 = np.repeat(1, 250)
X2 = np.random.multivariate_normal([-1.5,1.5], [[1,0],[0,1]], size=250)
y2 = np.repeat(0, 250)
X3 = np.random.multivariate_normal([1.5,1.5], [[1,0],[0,1]], size=250)
y3 = np.repeat(1, 250)
X4 = np.random.multivariate_normal([1.5,-1.5], [[1,0],[0,1]], size=250)
y4 = np.repeat(0, 250)

X_train = np.concatenate([X1,X2,X3,X4])
y_train = np.concatenate([y1,y2,y3,y4])

In [4]:
np.random.seed(2024)
X1 = np.random.multivariate_normal([-1.5,-1.5], [[1,0],[0,1]], size=250)
y1 = np.repeat(1, 250)
X2 = np.random.multivariate_normal([-1.5,1.5], [[1,0],[0,1]], size=250)
y2 = np.repeat(0, 250)
X3 = np.random.multivariate_normal([1.5,1.5], [[1,0],[0,1]], size=250)
y3 = np.repeat(1, 250)
X4 = np.random.multivariate_normal([1.5,-1.5], [[1,0],[0,1]], size=250)
y4 = np.repeat(0, 250)

X_test = np.concatenate([X1,X2,X3,X4])
y_test = np.concatenate([y1,y2,y3,y4])

## Train model and get exp

In [5]:
mlp = MLPClassifier(hidden_layer_sizes=(64, 64), batch_size=16, random_state=2020, max_iter=1000)
mlp.fit(X_train, y_train)

predictions = mlp.predict_proba(X_test)[:, 1]
function_mlp = predictions.reshape(-1,1)

exp_lime_mlp = run_lime(X_test, mlp)

In [6]:
print(f"Accuracy: {accuracy_score(y_test, mlp.predict(X_test))}")
print(f"F1: {f1_score(y_test, mlp.predict(X_test))}")

Accuracy: 0.834
F1: 0.83852140077821


## Creating the mapper outputs

In [7]:
mapper_outputs = []
explanation_vectors = []
expl_labels = []

for res in [5,10,20]:
    for gain in [0.2,0.4]:
        for dist in [0.2,0.3,0.4]:
            expl_labels.append(f"R{res}-G{gain}-D{dist}")
            explanation_vectors.append(exp_lime_mlp)
            mapper_outputs.append(create_mapper(exp_lime_mlp, function_mlp, resolution=res, gain=gain, dist_thresh=dist))

## Visualize

In [8]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:1920px !important; }</style>"))

In [9]:
explanation_list=[]
for expl in explanation_vectors:
    explanation_list.append(expl.tolist())

predicted_prob = np.array([np.squeeze(i) for i in predictions])

#column names of the dataframe
column_names= pd.Series(["Feat1", "Feat2"]) 

In [12]:
#visualize
mnt = Mountaineer()
mnt.visualize(X_test, y_test, predicted_prob, explanation_list, mapper_outputs, column_names, expl_labels)

