Research question 1: Do concept-based explanations produce more faithful explanations than feature attribution methods?

In [1]:
pip install -r requirements.txt


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.10 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


Step 0:Setup

In [6]:
import numpy as np
import pickle
import hashlib
import pandas as pd

np.random.seed(42)

base_path = "/Users/karlgustav/Documents/GitHub/study/master-thesis/server/src/research/data/"
# base_path = "/Users/karl-gustav.kallasmaa/Documents/Projects/master-thesis/server/src/"
masks_path = f"{base_path}masks.pkl"
img_path = f"{base_path}resized_imgs.pkl"
labels_path = f"{base_path}classes.pkl"
ade_path = f"{base_path}objectInfo150.csv"

labels = []
images = []
masks = []
with open(masks_path, 'rb') as f:
    masks = pickle.load(f)
with open(img_path, 'rb') as f:
    images = pickle.load(f)
with open(labels_path, 'rb') as f:
    labels = np.array(pickle.load(f))


ade_classes = pd.read_csv(ade_path)

image_hex_index_map = {hashlib.sha1(np.array(img).view(np.uint8)).hexdigest(): i for i,img in enumerate(images)}

index_img_map = {i:img for i,img in enumerate(images)}
index_label_map = {i:label for i,label in enumerate(labels)}
index_mask_map = {i:mask for i,mask in enumerate(masks)}
index_ade_map = {i:ade for i,ade in enumerate(ade_classes)}

random_indexes = np.random.choice(list(index_img_map.keys()), int(0.1*len(index_img_map.keys())), replace=False)

random_images = [index_img_map[index] for index in random_indexes]
random_labels = [index_img_map[index] for index in random_indexes]
random_masks = [index_mask_map[index] for index in random_indexes]

In [8]:
from typing import List
from sklearn import preprocessing

def encode_categorical_values(values: List[str]):
    unique_values = sorted(list(set(values)))
    le = preprocessing.LabelEncoder()
    le.fit(unique_values)
    return le

label_encoder = encode_categorical_values(labels)

Step 1: Get lime predictions

In [9]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import decode_predictions

blackbox_model = ResNet50(weights='imagenet', include_top=True, input_shape=(224, 224, 3))

def preprocess_image(image):
    image = tf.image.resize(image, (224, 224))
    image = tf.keras.applications.resnet50.preprocess_input(np.array(image))
    return image

def black_box_classify(img_array):
    predictions = []
    for i in img_array:
        img = preprocess_image(i)
        prediction = blackbox_model.predict(img[np.newaxis, ...])
        print(prediction)
        prediction_label = decode_predictions(prediction, top = 1)
        print(prediction_label)
        prediction_label = prediction_label[0][0][1]
        print(prediction_label)
        predictions.append(prediction_label)
    return predictions

In [11]:
from lime import lime_image

def explain_with_lime(images, num_samples=1000, num_features=10, hide_color=None):
    explainer = lime_image.LimeImageExplainer()
    explanations = []
    for image in images:
        # Reshape the image to (height, width, channels) format
        # image_reshaped = image.reshape(image.shape[:-1])
        # Explain the image predictions using LIME
        explanation = explainer.explain_instance(np.array(image),
                                                 classifier_fn=black_box_classify,
                                                 top_labels=1,
                                                 hide_color=hide_color,
                                                 num_samples=num_samples,
                                                 num_features=num_features)
        explanations.append(explanation)
    return explanations

lime_predictions = explain_with_lime(random_images[0:10])

  0%|          | 0/1000 [00:00<?, ?it/s]

[[2.85903532e-08 3.15815143e-08 1.11002248e-06 5.45507419e-06
  1.32178343e-06 1.54394769e-07 3.66842301e-07 3.26169136e-08
  1.43573358e-08 2.66573284e-06 4.03266887e-09 1.33449367e-08
  3.99605611e-08 7.94513610e-08 1.78964381e-08 7.68009585e-08
  2.95768956e-08 3.64144981e-08 2.13381782e-07 1.71590475e-08
  7.16001551e-08 1.12918102e-07 1.60743710e-07 5.57525539e-07
  6.80052548e-09 1.02555248e-07 1.04369491e-08 1.12437228e-07
  1.66774239e-09 8.83070328e-08 3.50071936e-08 2.80194588e-08
  4.30709974e-08 1.76929170e-04 6.46758592e-04 9.61112150e-08
  2.22882454e-06 7.27254772e-08 4.47403039e-08 4.67278551e-06
  1.63063945e-08 8.55679190e-08 5.14836955e-08 8.06309970e-08
  1.75011081e-07 6.17611207e-08 9.21602634e-08 1.35467030e-06
  5.67343470e-08 1.59658589e-06 1.39224561e-08 6.43186970e-09
  1.06732870e-07 1.23365886e-08 8.48687080e-07 1.49192747e-08
  4.60283722e-08 1.58808806e-08 5.84081761e-07 3.19320549e-07
  1.34893540e-07 5.09190379e-08 9.20318612e-08 1.23480589e-07
  2.4268

Step 2: Get concept-based desision tree explanations

In [None]:
#TODO: fix this
MOST_POPULAR_CONCEPTS = {}

In [11]:
from typing import List,Tuple
from sklearn import preprocessing
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier


def get_segment_relative_size(segment: np.array, picture: np.array) -> float:
    segment_area = float(segment.shape[0] * segment.shape[1])
    picture_area = float(picture.shape[0] * picture.shape[1])
    return round(segment_area / picture_area, 2)

def get_segments(img, mask, threshold=0.05):
    segs = np.unique(mask)
    segments = []
    total = mask.shape[0] * mask.shape[1]
    segments_classes = []
    for seg in segs:
        idxs = mask == seg
        sz = np.sum(idxs)
        if sz < threshold * total:
            continue
        segment = img * idxs[..., None]
        w, h, _ = np.nonzero(segment)
        segment = segment[np.min(w):np.max(w), np.min(h):np.max(h), :]
        segments.append(segment)
        segments_classes.append(ade_classes['Name'].loc[ade_classes['Idx'] == seg].iloc[0])
    return segments, segments_classes

def get_training_row(user_selected_concepts: List[str], pic, mask) -> np.array:
    row = np.zeros(len(user_selected_concepts))
    pic_as_array = np.array(pic)
    segss, seg_class = get_segments(pic_as_array, mask, threshold=0.005)
    for index, el in enumerate(user_selected_concepts):
        if el in seg_class:
            segment = segss[seg_class.index(el)]
            row[index] = get_segment_relative_size(segment, pic_as_array)
    return row

def train_and_test_decision_tree(x, y) -> Tuple[DecisionTreeClassifier, float]:
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    return clf, clf.score(X_test, y_test)

SyntaxError: incomplete input (1518711617.py, line 1)

In [None]:
def train_concept_explainer(all_labels,all_images,all_masks):
    X, y = [], []
    for label, pic, mask in zip(all_labels,all_images, all_masks):
        most_popular_concepts_for_label = MOST_POPULAR_CONCEPTS[label]
        row = get_training_row(most_popular_concepts_for_label, pic, mask)
        label_as_nr = label_encoder.transform([label])
        X.append(row)
        y.append(label_as_nr)
    clf, accuracy = train_and_test_decision_tree(np.array(X), np.array(y))
    return clf

In [None]:
def explain_with_concepts(images,model):
    predictions = []
    for img in images:
        img_key = hashlib.sha1(np.array(img).view(np.uint8)).hexdigest()
        image_index = image_hex_index_map[img_key]
        image_label = index_label_map[image_index]

        most_popular_concepts_for_label = MOST_POPULAR_CONCEPTS[image_label]
        mask = index_mask_map[image_index]
        
        row = get_training_row(most_popular_concepts_for_label, img, mask)
        prediction_as_nr = model.predict([row])
        prediction_as_label = label_encoder.inverse_transform(prediction_as_nr)
        predictions.append(prediction_as_label)
    return predictions

concept_model = train_concept_explainer(labels,images,masks)
concept_predictions = explain_with_concepts(random_images,concept_model)

Step 3: calculate fidelity

In [12]:
def black_box_models_predictions(images):
    predictions = []
    for img in images:
        img_key = hashlib.sha1(np.array(img).view(np.uint8)).hexdigest()
        image_index = image_hex_index_map[img_key]
        image_label = index_label_map[image_index]

        predictions.append(image_label)
    return predictions

def fidelity(pred1,pred2):
    same = 0
    not_same = 0
    for p1 in pred1:
        for p2 in pred2:
            if p1 == p2:
                same += 1
            else:
                not_same += 1
    return same / not_same


black_box_pred = black_box_models_predictions(random_images)

lime_fidelity = fidelity(pred1=lime_predictions,pred2=black_box_pred)
concept_fidelity = fidelity(pred1=concept_predictions,pred2=black_box_pred)


print("LIME fidelity "+lime_fidelity)
print("Concept fidelity"+concept_fidelity)
if lime_fidelity > concept_fidelity:
    diff = lime_fidelity - concept_fidelity
    print("LIME fidelity is greater than concept fidelity by "+diff)
else:
    diff = concept_fidelity -lime_fidelity
    print("Concept fidelity is creater than LIME fidelity by "+diff)

NameError: name 'random_images' is not defined