In [1]:
import numpy as np
import keras
import pandas as pd
from keras import datasets
import fexigo as fx
import gc
from scipy.spatial import distance as sp_distance

seed = 42
keras.utils.set_random_seed(seed)

def extract_cases(k, numElements, last_element, model, X_train, y_train, X_test, y_test, df, batch_size=32):
    elements = range(last_element+1, last_element+numElements+1)

    for layer in range(len(model.layers)-1):
        explainer = fx.FastActivationExplainer(model, X_train, n_layer=layer+1)
        for i in range(0, len(X_test[elements]), batch_size):
            indices, distances = explainer.explain(X_test[elements][i:i+batch_size], metric=sp_distance.braycurtis, top_k=k)

            closests = [[np.argmax(y_train[i]) for i in k] for k in indices]
            labels = [[np.unique(k, return_counts=True)] for k in closests]

            probabilities = [[count/k for _,count in counts] for counts in labels]
            entropy = [-np.sum(k * np.log(k)) for k in probabilities]

            decision = [[label[np.argmax(count)] for label,count in counts][0] for counts in labels]

            for i, element in enumerate(elements[i:i+batch_size]):
                df = df._append({'layer': layer, 'element': element, 'uncertainty': entropy[i], 'decision': decision[i], 'real': np.argmax(y_test[element]),
                                'final_prediction': np.argmax(model.predict(X_test[[element]], verbose=0))}, ignore_index=True)
                
            del indices, distances, closests, labels, entropy, probabilities, decision
            gc.collect()
        del explainer
        gc.collect()

    return df

2025-02-27 09:57:34.642725: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-27 09:57:34.994258: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# MNIST

In [2]:
(X_train, y_train), (X_test, y_test) = datasets.mnist.load_data()
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

model = keras.models.load_model('models/mnist_cnn.keras')

In [None]:
try:
    df = pd.read_csv('case_database_mnist.csv')
except FileNotFoundError:
    df = pd.DataFrame(columns=['layer', 'element', 'uncertainty', 'decision', 'real', 'final_prediction'])

last_element = df['element'].max() if not df.empty else -1
last_element = int(last_element)
print('Last element:', last_element)

Last element: 9999


In [None]:
df = extract_cases(10, 1000, last_element, model, X_train, y_train, X_test, y_test, df)



In [5]:
df.to_csv('case_database_mnist.csv', index=False)

# CIFAR-10

In [2]:
(X_train, y_train), (X_test, y_test) = datasets.cifar10.load_data()
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

model = keras.models.load_model('models/cifar10_cnn.keras')

In [8]:
try:
    df = pd.read_csv('case_database_cifar.csv')
except FileNotFoundError:
    df = pd.DataFrame(columns=['layer', 'element', 'uncertainty', 'decision', 'real', 'final_prediction'])

last_element = int(df['element'].max()) if not df.empty else -1
print('Last element:', last_element)

Last element: 9999


In [5]:
df = extract_cases(10, 1000, last_element, model, X_train, y_train, X_test, y_test, df)



In [26]:
df.to_csv('case_database_cifar.csv', index=False)