<a href="https://colab.research.google.com/github/Fgp910/exigo/blob/main/exigo_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Exigo tutorial

In this tutorial, we will show how to use the ActivationExplainer class from the Exigo framework. For this example, we will train rather shallow DNNs with simple binary classification datasets.

In [None]:
# First, clone the Exigo repository
!git clone https://github.com/Fgp910/exigo.git
!cp exigo/exigo.py exigo.py

In [None]:
# Import the required libraries
import exigo
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_circles, make_moons
import tensorflow as tf
from tensorflow import keras
from scipy.spatial import distance as sp_distance

In [None]:
# Just a function to plot our datasets, split into training and test sets
def plot_data_split(X_train, X_test, y_train, y_test):
    cm = ListedColormap(["#FF0000", "#0000FF"])
    # Plot the training points
    plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm, edgecolors="k")
    # Plot the testing points
    plt.scatter(
        X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm, alpha=0.6, edgecolors="k"
    )
    ax = plt.gca()
    ax.set_aspect(1)
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.yaxis.set_major_locator(MaxNLocator(integer=True))

In [None]:
# Dataset generation
make_data = make_circles    # For moons, replace make_circles with make_moons
data = make_data(n_samples=500, noise=0.1, factor=0.4)
X, y = data
X = StandardScaler().fit_transform(X) # Standardizes the data

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
plot_data_split(X_train, X_test, y_train, y_test)

In [None]:
# We define the architecture and other hyperparameters for our DNN model (for binary classificaton)
model = keras.Sequential([
    keras.Input(shape=(2,)),
    keras.layers.Dense(5, activation='relu'),
    keras.layers.Dense(10, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

model.summary()

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[keras.metrics.BinaryAccuracy(name="acc")],
)

In [None]:
# Training the model
batch_size = 16
val_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size)
model.fit(X_train, y_train, batch_size=batch_size, epochs=50, validation_data=val_dataset)

In [None]:
# Explainer configuration
explainer = exigo.ActivationExplainer(model, X_train)

In [None]:
# Now we make some explanations for a certain index (using some "distance" function)
# Try changing these values
test_idx = 111
distance_function = sp_distance.euclidean
weights = [1,2,0]   # Last layer is excluded, for example

In [None]:
# For top 10 only
top10_indices, top10_similarities = explainer.explain(X_test[test_idx], distance_function, weights=weights, top_k=10)
print("Top:")
print(top10_indices, top10_similarities)

# For similarities above threshold only
above_indices, above_similarities = explainer.explain(X_test[test_idx], distance_function, weights=weights,
                                                      threshold=0.997)
print("Threshold:")
print(above_indices, above_similarities)

# All points
indices, similarities = explainer.explain(X_test[test_idx], distance_function, weights=weights)
print("All:")
print(indices, similarities)

In [None]:
# Heatmap of the dataset points compared with the test_idx point
plt.inferno()
plt.scatter(X_train[indices, 0], X_train[indices, 1], c=similarities)
plt.scatter(X_test[test_idx, 0], X_test[test_idx, 1], c='w', edgecolor='k', s=100)
plt.colorbar().ax.set_title("Similarity")