In [17]:
import pickle as pkl
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
import pickle
import torch
from torchvision import datasets, transforms

In [18]:
os.chdir('/Users/gouse/PycharmProjects/AR-Imperial-Thesis')
os.getcwd()

'/Users/gouse/PycharmProjects/AR-Imperial-Thesis'

## Load the data

In [19]:
feature_names = ["thickness_small", "thickness_medium", "thickness_large", "thickness_xlarge",
                 "width_small", "width_medium", "width_large", "width_xlarge",
                 "length_small", "length_medium", "length_large", "length_xlarge"]

class_names = ["6", "8", "9"]

# mapping from feature index to feature name
feature_index_to_name = {i: feature_name for i, feature_name in enumerate(feature_names)}
# mapping from feature name to feature index
feature_name_to_index = {feature_name: i for i, feature_name in enumerate(feature_names)}
# mapping from class index to class name
class_index_to_name = {i: class_name for i, class_name in enumerate(class_names)}
# mapping from class name to class index
class_name_to_index = {class_name: i for i, class_name in enumerate(class_names)}

In [20]:
# Download training and test sets
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.5,), (0.5,))  # Normalize the images
])

train_dataset = datasets.MNIST(root='./datasets/MNIST/data', train=True,
                               download=True,
                               transform=transform)
test_dataset = datasets.MNIST(root='./datasets/MNIST/data', train=False,
                              download=True,
                              transform=transform)

dict_of_lists = {6: [], 8: [], 9: []}
for i, (_, label) in enumerate(train_dataset):
    if label in dict_of_lists.keys():
        dict_of_lists[label].append(
            train_dataset.data[i].reshape(1, 28, 28))

for key in dict_of_lists.keys():
    dict_of_lists[key] = np.vstack(dict_of_lists[key]).reshape(-1, 1,
                                                               28, 28)
    if key == 8:
        X = torch.cat((torch.tensor(dict_of_lists[6]),
                       torch.tensor(dict_of_lists[8])))
    elif key > 8:
        X = torch.cat((X, torch.tensor(dict_of_lists[key])))

# import pickle files
with open('./datasets/MNIST/mine_preprocessed/area_dict.pkl', 'rb') as f:
    area = pickle.load(f)
with open('./datasets/MNIST/mine_preprocessed/length_dict.pkl', 'rb') as f:
    length = pickle.load(f)
with open('./datasets/MNIST/mine_preprocessed/thickness_dict.pkl', 'rb') as f:
    thickness = pickle.load(f)
with open('./datasets/MNIST/mine_preprocessed/slant_dict.pkl', 'rb') as f:
    slant = pickle.load(f)
with open('./datasets/MNIST/mine_preprocessed/width_dict.pkl', 'rb') as f:
    width = pickle.load(f)
with open('./datasets/MNIST/mine_preprocessed/height_dict.pkl', 'rb') as f:
    height = pickle.load(f)

# load the targets test
with open('./datasets/MNIST/mine_preprocessed/area_dict_test.pkl', 'rb') as f:
    area_test = pickle.load(f)
with open('./datasets/MNIST/mine_preprocessed/length_dict_test.pkl', 'rb') as f:
    length_test = pickle.load(f)
with open('./datasets/MNIST/mine_preprocessed/thickness_dict_test.pkl', 'rb') as f:
    thickness_test = pickle.load(f)
with open('./datasets/MNIST/mine_preprocessed/slant_dict_test.pkl', 'rb') as f:
    slant_test = pickle.load(f)
with open('./datasets/MNIST/mine_preprocessed/width_dict_test.pkl', 'rb') as f:
    width_test = pickle.load(f)
with open('./datasets/MNIST/mine_preprocessed/height_dict_test.pkl', 'rb') as f:
    height_test = pickle.load(f)

In [21]:
targets = []
digits_size = 0
labels = []
# for i in range(4,10):
for i in [6, 8, 9]:
    # targets += list(
    #     zip(thickness[i], width[i], slant[i], height[i]))
    targets += list(
        zip(thickness[i], width[i], length[i]))
    # targets += list(
    # zip(thickness[i], area[i], length[i],
    #                     width[i], height[i], slant[i]))
    if i == 6:
        k = 0
    elif i == 8:
        k = 1
    else:
        k = 2
    # labels.append([(i-4) for j in range(len(targets) - digits_size)])
    labels.append([k for j in range(len(targets) - digits_size)])
    digits_size += len(width[i])

targets = np.array(targets)

def assign_bins(data, bin_edges):
    return np.digitize(data, bins=bin_edges, right=True)

# Convert bin numbers to one-hot encoded values
def one_hot_encode(bin_numbers, num_bins):
    return np.eye(num_bins)[bin_numbers - 1]

def process_data(targets, num_bins=4):
    bins_data_all_indices = {}
    bins_data_all = []
    min_max_values_all = []
    closest_images_all = []
    bin_counts = []

    for i in range(targets.shape[1]):
        # Combine the data
        combined_data = list(targets[:, i])

        # Sort the combined data
        combined_sorted = np.sort(combined_data)

        # Determine the number of data points per bin
        bin_size = len(combined_sorted) // num_bins

        # Calculate bin edges
        bin_edges = [combined_sorted[i * bin_size] for i in range(1, num_bins)] + [combined_sorted[-1]]
        bin_edges = [-np.inf] + bin_edges

        # Assign bins to the original data lists
        bins_data = assign_bins(targets[:, i], bin_edges)

        # Do one-hot encoding in the bins
        bins_data_encoded = one_hot_encode(bins_data, num_bins)

        # Get min and max values per bin
        min_max_values = []
        closest_images = []
        counts = []

        feature_bins_data = {}

        for bin_num in range(1, num_bins + 1):
            bin_indices = np.where(bins_data == bin_num)[0]
            bin_values = targets[bin_indices, i]
            counts.append(len(bin_indices))

            if len(bin_values) > 0:
                min_val = np.min(bin_values)
                max_val = np.max(bin_values)
                min_max_values.append((min_val, max_val))

                # Select 5 images closest to the minimum and 5 closest to the maximum
                closest_min_indices = bin_indices[np.argsort(np.abs(bin_values - min_val))[:5]]
                closest_max_indices = bin_indices[np.argsort(np.abs(bin_values - max_val))[:5]]
                closest_images.append((closest_min_indices, closest_max_indices))
            else:
                min_max_values.append((None, None))
                closest_images.append(([], []))
                
            feature_bins_data[bin_num] = list(bin_indices)

        bins_data_all.append(bins_data_encoded)
        bins_data_all_indices[i] = feature_bins_data
        min_max_values_all.append(min_max_values)
        closest_images_all.append(closest_images)
        bin_counts.append(counts)

    return bins_data_all, bins_data_all_indices, min_max_values_all, closest_images_all, bin_counts

# Example usage:
#targets = np.random.randn(100, 2)  # Example targets with 2 features and 100 samples
num_bins = 4
bins_data_all, bins_data_all_indices, min_max_values_all, closest_images_all, bin_counts = process_data(targets, num_bins=num_bins)

# Output the results
for feature_idx in range(targets.shape[1]):
    print(f"Feature {feature_idx}")
    for bin_idx, (min_val, max_val) in enumerate(min_max_values_all[feature_idx]):
        print(f"  Bin {bin_idx + 1}: Min = {min_val}, Max = {max_val}")
        print(f"    Closest to Min: {closest_images_all[feature_idx][bin_idx][0]}")
        print(f"    Closest to Max: {closest_images_all[feature_idx][bin_idx][1]}")


Feature 0
  Bin 1: Min = 1.0608199852609512, Max = 2.0955714117145585
    Closest to Min: [ 5950  6513 16167  1617  8621]
    Closest to Max: [ 5724 15599 12669 15554 15557]
  Bin 2: Min = 2.095583355223437, Max = 2.455555185927587
    Closest to Min: [  157 11735  2878 11170  5908]
    Closest to Max: [10186    92 12276  7142 11892]
  Bin 3: Min = 2.455627281914512, Max = 2.901642846528742
    Closest to Min: [9156 6174 7889 4619 3771]
    Closest to Max: [  244   426  4118  5549 12352]
  Bin 4: Min = 2.9016807521673793, Max = 9.53389237525011
    Closest to Min: [17317  8534  9637 12831 12134]
    Closest to Max: [ 6880 11069  6466  8310 10537]
Feature 1
  Bin 1: Min = 5.381124287425585, Max = 10.939789083379203
    Closest to Min: [12129 10069  1848  2104  1426]
    Closest to Max: [  295 14518 15892 14841 10100]
  Bin 2: Min = 10.940738953737265, Max = 12.576095107156878
    Closest to Min: [11208  4297  6288  6421 12348]
    Closest to Max: [11690 16675 17656 10882 16855]
  Bin 3:

In [22]:
C = np.stack(bins_data_all, axis=1).reshape(-1, num_bins* targets.shape[1])
y = np.array([item for sublist in labels for item in sublist])
np.random.seed(42)

from sklearn.model_selection import train_test_split
# Split the data
def train_test_split_with_indices(*arrays, **options):
    # Extract the test_size and train_size parameters if they exist
    test_size = options.pop('test_size', None)
    train_size = options.pop('train_size', None)
    random_state = options.pop('random_state', None)
    shuffle = options.pop('shuffle', True)
    stratify = options.pop('stratify', None)
    indices = options.pop('indices', None)

    # Get the number of samples in the input arrays
    n_samples = arrays[0].shape[0]

    # Use provided indices or generate default indices
    if indices is None:
        indices = np.arange(n_samples)
    
    # Generate indices for the split
    train_indices, test_indices = train_test_split(
        indices, test_size=test_size, train_size=train_size, 
        random_state=random_state, shuffle=shuffle, stratify=stratify
    )

    # Split the arrays using the generated indices
    result = []
    for array in arrays:
        result.append(array[train_indices])
        result.append(array[test_indices])

    # Append the indices to the result
    result.append(train_indices)
    result.append(test_indices)

    return result

X_train, X_val, C_train, C_val, y_train, y_val, train_indices, val_indices = train_test_split_with_indices(X, C, y,
                                                                  test_size=0.5,
                                                                  random_state=42)

train_index_to_or_index = {i: original_idx for i, original_idx in enumerate(train_indices)}
train_or_index_to_index = {original_idx: i for i, original_idx in enumerate(train_indices)}

X_val, X_test, C_val, C_test, y_val, y_test, val_indices, test_indices = train_test_split_with_indices(X_val, C_val, y_val,
                                                                  test_size=0.5,
                                                                  random_state=42)
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
C_train = torch.tensor(C_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)

X_val = torch.tensor(X_val, dtype=torch.float32)
C_val = torch.tensor(C_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.long)

X_test = torch.tensor(X_test, dtype=torch.float32)
C_test = torch.tensor(C_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

  X_train = torch.tensor(X_train, dtype=torch.float32)
  X_val = torch.tensor(X_val, dtype=torch.float32)
  X_test = torch.tensor(X_test, dtype=torch.float32)


#### Load test accuracies per leaf

In [23]:
output_path = "/Users/gouse/PycharmProjects/AR-Imperial-Thesis/logs_and_models_to_show/analysis_notebooks/completeness_scores/CBM_model_new"

In [24]:
with open(os.path.join(output_path, 'accuracy_per_original_path_dict.pkl'), 'rb') as f:
    accuracy_per_original_path_dict = pkl.load(f)
with open(os.path.join(output_path, 'accuracy_per_new_path_dict.pkl'), 'rb') as f:
    accuracy_per_new_path_dict = pkl.load(f)
with open(os.path.join(output_path, 'leaf_samples_indices.pkl'), 'rb') as f:
    leaf_samples_indices = pkl.load(f)

In [25]:
accuracy_per_original_path_dict

{4: 0.6076923076923076,
 6: 0.3469387755102041,
 7: 0.573170731707317,
 10: 0.4878048780487805,
 11: 0.46,
 12: 0.64,
 15: 0.496996996996997,
 16: 0.4946236559139785,
 17: 0.9591836734693877,
 18: 0.7394757744241461}

In [26]:
accuracy_per_new_path_dict

{4: 0.6076923076923076,
 6: 0.47619047619047616,
 7: 0.573170731707317,
 10: 0.4878048780487805,
 11: 0.46,
 12: 0.64,
 15: 0.496996996996997,
 16: 0.4946236559139785,
 17: 0.9591836734693877,
 18: 0.7942811755361397}

In [27]:
leaf_samples_indices

{4: array([   5,    7,    9,   13,   19,   24,   32,   33,   36,   39,   41,
          44,   47,   58,   62,   64,   68,   74,   76,   83,   84,   86,
          92,   98,   99,  106,  115,  119,  120,  125,  128,  131,  134,
         135,  142,  143,  151,  153,  156,  158,  162,  178,  181,  182,
         196,  214,  216,  228,  229,  234,  235,  237,  239,  241,  245,
         248,  249,  251,  255,  256,  268,  272,  276,  278,  281,  284,
         286,  288,  290,  307,  311,  317,  318,  319,  321,  322,  327,
         330,  339,  341,  361,  363,  364,  366,  371,  377,  388,  389,
         391,  393,  394,  403,  404,  409,  410,  421,  424,  428,  436,
         453,  455,  456,  476,  479,  481,  483,  491,  493,  503,  504,
         512,  517,  518,  536,  538,  541,  543,  547,  553,  554,  560,
         565,  570,  574,  581,  587,  593,  594,  598,  606,  609,  610,
         614,  616,  620,  639,  642,  644,  646,  656,  663,  665,  684,
         686,  688,  699,  730,  74

#### Load test accuracies per leaf for the blackbox x->y model

In [28]:
output_path = "/Users/gouse/PycharmProjects/AR-Imperial-Thesis/logs_and_models_to_show/analysis_notebooks/completeness_scores/blackbox_model/test_pred_correct_or_not_xtoy.pkl"

In [29]:
# open pickle file
with open(output_path, 'rb') as f:
    test_pred_correct_or_not_xtoy = pkl.load(f)

print("Length of test dataset: ", len(test_pred_correct_or_not_xtoy))
acc = test_pred_correct_or_not_xtoy.sum() * 100/len(test_pred_correct_or_not_xtoy)
print("Accuracy of the blackbox model: ", acc.item())

Length of test dataset:  4430
Accuracy of the blackbox model:  99.50338745117188


In [30]:
# compute the acuracy of the blackbox model per leaf
accuracy_per_path_blackbox_dict = {}
for path in leaf_samples_indices.keys():
    indices = leaf_samples_indices[path]
    accuracy_per_path_blackbox_dict[path] = (test_pred_correct_or_not_xtoy[indices].sum()/len(indices)).item()
    
accuracy_per_path_blackbox_dict

{4: 0.9987179636955261,
 6: 0.9931972622871399,
 7: 0.9959349632263184,
 10: 1.0,
 11: 0.9933333396911621,
 12: 0.9933333396911621,
 15: 0.9969969987869263,
 16: 0.9946236610412598,
 17: 0.9959183931350708,
 18: 0.9912629127502441}

#### Compute the test completeness scores

In [31]:
acuracy_of_random_guessing = 1/3

completeness_scores_per_original_path = {}
for path in accuracy_per_original_path_dict.keys():
    completeness_scores_per_original_path[path] = (accuracy_per_original_path_dict[path] - acuracy_of_random_guessing) / (accuracy_per_path_blackbox_dict[path]  - acuracy_of_random_guessing)
    
completeness_scores_per_new_path = {}
for path in accuracy_per_new_path_dict.keys():
    completeness_scores_per_new_path[path] = (accuracy_per_new_path_dict[path] - acuracy_of_random_guessing) / (accuracy_per_path_blackbox_dict[path]  - acuracy_of_random_guessing)

In [32]:
for path in completeness_scores_per_original_path.keys():
    print(f"Original path: {path}, Completeness score: {completeness_scores_per_original_path[path]}")
    
for path in completeness_scores_per_new_path.keys():
    print(f"New path: {path}, Completeness score: {completeness_scores_per_new_path[path]}")

Original path: 4, Completeness score: 0.4123313972696226
Original path: 6, Completeness score: 0.02061855722048902
Original path: 7, Completeness score: 0.3619631880662883
Original path: 10, Completeness score: 0.23170731707317072
Original path: 11, Completeness score: 0.19191919007042021
Original path: 12, Completeness score: 0.46464646017049094
Original path: 15, Completeness score: 0.24660633417652114
Original path: 16, Completeness score: 0.24390243713330498
Original path: 17, Completeness score: 0.944558484797899
Original path: 18, Completeness score: 0.6173038176072825
New path: 4, Completeness score: 0.4123313972696226
New path: 6, Completeness score: 0.21649485081513445
New path: 7, Completeness score: 0.3619631880662883
New path: 10, Completeness score: 0.23170731707317072
New path: 11, Completeness score: 0.19191919007042021
New path: 12, Completeness score: 0.46464646017049094
New path: 15, Completeness score: 0.24660633417652114
New path: 16, Completeness score: 0.243902437