In [1]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import timm
import torch
import torch.nn as nn

from scipy.spatial import distance
import seaborn as sns

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def disable_module(module):
    for p in module.parameters():
        p.requires_grad = False
        
def enable_module(module):
    for p in module.parameters():
        p.requires_grad = True


def check_tunable_params(model, verbose=True):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    
    for name, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            if(verbose):
                print(name)
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.5f}"
    )

    return trainable_params, all_param

def create_mapping(model, vector):
    mapping = {}
    i = 0

    for name_p,p in model.named_parameters():
        if '.attn.' in name_p or 'attention' in name_p:
            mapping[name_p] = vector[i]
            i += 1
        else:
            p.requires_grad = False
            
    return mapping

def sort_dict(dict, descending=False):
    sorted_dict = dict(sorted(dict.items(), key=lambda item: item[1], reverse=descending))
    
    return sorted_dict

def get_modules_from_vector(vector, model):
    trainable_blocks = []
    frozen_blocks = []
    
    trainable_blocks = np.where(np.array(vector) == 1)
    frozen_blocks = np.where(np.array(vector) == 0)
    
    return trainable_blocks, frozen_blocks

def get_model_for_bitfit(model):
    trainable_components = ['bias', 'pooler.dense.bias', 'head'] 

    # Disale all the gradients
    for param in model.parameters():
        param.requires_grad = False 
      
    vector = []

    for name, param in model.named_parameters():
        for component in trainable_components:
            if component in name:
                vector.append(1)
                param.requires_grad = True
                break
    
    return vector

def enable_from_vector(vector, model):
    print("Vector: ", vector)
    
    disable_module(model)
    
    for idx, block in enumerate(model.blocks): 
    
        if(vector[idx] == 1):
            print("Enabling attention in Block {}".format(idx))
            enable_module(block.attn)
        else:
            #print("Disabling attention in Block {}".format(idx))
            disable_module(block.attn)

def create_best_worst_vectors(df, k=10):
    
    best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
    worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

    best_vector = np.array([0]*12)

    for i in range(len(best_df)):
        vector_path = best_df['Vector Path'][i]
        vector = np.load(vector_path)
        best_vector += vector

    worst_vector = np.array([0]*12)

    for i in range(len(worst_df)):
        vector_path = worst_df['Vector Path'][i]
        vector = np.load(vector_path)
        worst_vector += vector

    return best_vector, worst_vector

def tune_blocks_random(model, mask, segment):

    vector = []

    for idx, block in enumerate(model.blocks):

        if(mask is None):
            bit = int(np.random.random(1)[0] > 0.5)
        else:
            bit = mask[idx]

        if(bit == 1):
            print("Enabling {} in Block {}".format(segment, idx))
            if(segment == 'attention'):
                enable_module(block.attn)
            elif(segment == 'layernorm'):
                enable_module(block.norm1)
                enable_module(block.norm2)

            vector.append(1)
        else:
            print("Disabling {} in Block {}".format(segment, idx))
            if(segment == 'attention'):
                disable_module(block.attn)
            elif(segment == 'layernorm'):
                disable_module(block.norm1)
                disable_module(block.norm2)
            
            vector.append(0)
    
    if(mask is not None):
        assert (mask == vector)
        
    return vector

In [6]:
def create_random_mask(mask_length):
    #return np.random.randint(low=0, high=2, size=mask_length)
    return nn.Parameter(torch.randint(low=0, high=2, size=(mask_length,), dtype=torch.float32), requires_grad=True)

mask = create_random_mask(12)
mask

Parameter containing:
tensor([0., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 1.], requires_grad=True)

In [9]:
val_loss = torch.tensor([0.5], requires_grad=True)
val_loss



In [None]:
# model = timm.create_model('vit_base_patch16_224', pretrained=False)
# num_blocks = len(model.blocks)

# disable_module(model)

# mask = list(np.random.randint(low=0, high=2, size=num_blocks*4))
# attn_params = []

# for name_p,p in model.named_parameters():
#     if '.attn.' in name_p or 'attention' in name_p:
#         attn_params.append(p)

# attn_params = [p for name_p, p in model.named_parameters() if '.attn.' in name_p or 'attention' in name_p]

# for idx, p in enumerate(attn_params):
#     if(mask[idx] == 1):
#         p.requires_grad = True
#     else:
#         p.requires_grad = False

# check_tunable_params(model, True)


In [None]:
path = "/home/co-dutt1/rds/hpc-work/Layer-Masking/Experiment_Vectors/"
num_blocks = 12
# for i in range(1, 76):
#     vector = np.random.randint(low=0, high=2, size=num_blocks)
#     np.save(path + "random_vector_{}.npy".format(i), vector)

# 76th vector is for tuning all attention blocks
# vector = np.array([1]*12)
# np.save(path + "random_vector_{}.npy".format(76), vector)

# PARAMETER LEVEL

# path = "/home/co-dutt1/rds/hpc-work/Layer-Masking/Experiment_Vectors_Parameter/"
# for i in range(1, 76):
#     vector = np.random.randint(low=0, high=2, size=num_blocks*4)
#     np.save(path + "random_vector_{}.npy".format(i), vector)

# Random Attention Tuning (Block Level)

### BreastUS Dataset

In [None]:
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'
csv = base_path + 'vit_base/breastUS/' + '.csv'

df = pd.read_csv(csv)
#df['Vector Path'] = df['Vector Path'].apply(lambda x: os.path.join(base_path, x.split('/')[-1]))
df.head()

In [None]:
mean_acc = df['Test Acc@1'].mean()
std_acc = df['Test Acc@1'].std()
max_acc = df['Test Acc@1'].max()
min_acc = df['Test Acc@1'].min()
avg_train_percent = df['Train Percent'].mean()
diff = max_acc - min_acc
best_train_percent = df[df['Test Acc@1'] == max_acc]['Train Percent'].values[0]

print("Mean Acc: ", mean_acc)
print("Std Acc: ", std_acc)
print("Max Acc: ", max_acc)
print("Min Acc: ", min_acc)
print("Avg Train Percent: ", avg_train_percent)
print("Best Performance Train Percent: ", best_train_percent)
print("Diff: ", diff)

In [None]:
# lower_threshold = df['Test Acc@1'].quantile(0.10)
# upper_threshold = df['Test Acc@1'].quantile(0.90)

# top_1_percent = df[df['Test Acc@1'] >= upper_threshold].reset_index(drop=True)
# bottom_1_percent = df[df['Test Acc@1'] <= lower_threshold].reset_index(drop=True)

# bottom_1_percent

k = 10
best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

print(len(best_df), len(worst_df))
best_df

In [None]:
best_vector = np.array([0]*12)

for i in range(len(best_df)):
    vector_path = best_df['Vector Path'][i]
    vector = np.load(vector_path)
    best_vector += vector

worst_vector = np.array([0]*12)

for i in range(len(worst_df)):
    vector_path = worst_df['Vector Path'][i]
    vector = np.load(vector_path)
    worst_vector += vector

best_vector, worst_vector

### How many times was each block trained during 50 runs?

In [None]:
# See which blocks were activated the maximum number of times

sum_vec = np.array([0]*12)

for i in range(len(df)):
    vec = np.load(df['Vector Path'][i])
    sum_vec += vec

In [None]:
indices = np.arange(len(sum_vec))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, sum_vec)

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Number of Times Trainable')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(sum_vec)+1)))
plt.title('Bar Plot of the number of times each attention block was activated (in 50 runs) in a ViT-Base Model.')

# Show the plot
plt.tight_layout()
plt.savefig("Random_Attention_Block_Tuning_breastUS.png")

In [None]:
indices = np.arange(len(best_vector))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, best_vector, label='Best')
plt.bar(indices, worst_vector, label='Worst')
plt.legend()

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Block Selection Count')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(best_vector)+1)))
plt.title('Comparing the attention block selection frequency for best and worst performing vectors.')

# Show the plot
plt.tight_layout()
plt.savefig("Selection_Comparison_BreastUS.png")

We see that the best performing vectors tune later blocks more than the worst performing vectors.

### FitzPatrick Dataset

In [None]:
dataset = 'fitzpatrick'
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'

df = pd.read_csv(csv)
#df['Vector Path'] = df['Vector Path'].apply(lambda x: os.path.join(base_path, x.split('/')[-1]))
df.head()

In [None]:
mean_acc = df['Test Acc@1'].mean()
std_acc = df['Test Acc@1'].std()
max_acc = df['Test Acc@1'].max()
min_acc = df['Test Acc@1'].min()
avg_train_percent = df['Train Percent'].mean()
best_train_percent = df[df['Test Acc@1'] == max_acc]['Train Percent'].values[0]
diff = max_acc - min_acc

print("Mean Acc: ", mean_acc)
print("Std Acc: ", std_acc)
print("Max Acc: ", max_acc)
print("Min Acc: ", min_acc)
print("Avg Train Percent: ", avg_train_percent)
print("Best Performance Train Percent: ", best_train_percent)
print("Difference (Max, Min): ", diff)

In [None]:
k = 10
best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

print(len(best_df), len(worst_df))
best_df

In [None]:
best_vector = np.array([0]*12)

for i in range(len(best_df)):
    vector_path = best_df['Vector Path'][i]
    vector = np.load(vector_path)
    best_vector += vector

worst_vector = np.array([0]*12)

for i in range(len(worst_df)):
    vector_path = worst_df['Vector Path'][i]
    vector = np.load(vector_path)
    worst_vector += vector

best_vector, worst_vector

### How many times was each block selected?

In [None]:
# See which blocks were activated the maximum number of times

sum_vec = np.array([0]*12)

for i in range(len(df)):
    vec = np.load(df['Vector Path'][i])
    sum_vec += vec

In [None]:
indices = np.arange(len(sum_vec))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, sum_vec)

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Number of Times Trainable')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(sum_vec)+1)))
plt.title('Bar Plot of the number of times each attention block was activated (in 50 runs) in a ViT-Base Model.')

# Show the plot
plt.tight_layout()
plt.savefig("Random_Attention_Block_Tuning_{}.png".format(dataset))

In [None]:
indices = np.arange(len(best_vector))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, best_vector, label='Best')
plt.bar(indices, worst_vector, label='Worst')
plt.legend()

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Block Selection Count')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(best_vector)+1)))
plt.title('Comparing the attention block selection frequency for best and worst performing vectors.')

# Show the plot
plt.tight_layout()
plt.savefig("Selection_Comparison_{}.png".format(dataset))

### SMDG Dataset

In [None]:
dataset = 'smdg'
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'

df = pd.read_csv(csv)
#df['Vector Path'] = df['Vector Path'].apply(lambda x: os.path.join(base_path, x.split('/')[-1]))
df.head()

In [None]:
mean_acc = df['Test Acc@1'].mean()
std_acc = df['Test Acc@1'].std()
max_acc = df['Test Acc@1'].max()
min_acc = df['Test Acc@1'].min()
avg_train_percent = df['Train Percent'].mean()
best_train_percent = df[df['Test Acc@1'] == max_acc]['Train Percent'].values[0]
diff = max_acc - min_acc

print("Mean Acc: ", mean_acc)
print("Std Acc: ", std_acc)
print("Max Acc: ", max_acc)
print("Min Acc: ", min_acc)
print("Avg Train Percent: ", avg_train_percent)
print("Best Performance Train Percent: ", best_train_percent)
print("Difference (Max, Min): ", diff)

In [None]:
k = 10
best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

print(len(best_df), len(worst_df))
best_df

In [None]:
best_vector = np.array([0]*12)

for i in range(len(best_df)):
    vector_path = best_df['Vector Path'][i]
    vector = np.load(vector_path)
    best_vector += vector

worst_vector = np.array([0]*12)

for i in range(len(worst_df)):
    vector_path = worst_df['Vector Path'][i]
    vector = np.load(vector_path)
    worst_vector += vector

best_vector, worst_vector

### How many times was each block selected?


In [None]:
# See which blocks were activated the maximum number of times

sum_vec = np.array([0]*12)

for i in range(len(df)):
    vec = np.load(df['Vector Path'][i])
    sum_vec += vec

In [None]:
indices = np.arange(len(sum_vec))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, sum_vec)

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Number of Times Trainable')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(sum_vec)+1)))
plt.title('Bar Plot of the number of times each attention block was activated (in 50 runs) in a ViT-Base Model.')

# Show the plot
plt.tight_layout()
plt.savefig("Random_Attention_Block_Tuning_{}.png".format(dataset))

In [None]:
indices = np.arange(len(best_vector))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, best_vector, label='Best')
plt.bar(indices, worst_vector, label='Worst')
plt.legend()

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Block Selection Count')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(best_vector)+1)))
plt.title('Comparing the attention block selection frequency for best and worst performing vectors.')

# Show the plot
plt.tight_layout()
plt.savefig("Selection_Comparison_{}.png".format(dataset))

### HAM10000 Dataset

In [None]:
dataset = 'HAM10000'
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'
vector_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/saved_vectors/vit_base/HAM10000/tune_attention_blocks_random_0.0001/'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'

df = pd.read_csv(csv)
df['Vector Path'] = df['Vector Path'].apply(lambda x: os.path.join(vector_path, x.split('/')[-1]))
df.head()

In [None]:
mean_acc = df['Test Acc@1'].mean()
std_acc = df['Test Acc@1'].std()
max_acc = df['Test Acc@1'].max()
min_acc = df['Test Acc@1'].min()
avg_train_percent = df['Train Percent'].mean()
best_train_percent = df[df['Test Acc@1'] == max_acc]['Train Percent'].values[0]
diff = max_acc - min_acc

print("Mean Acc: ", mean_acc)
print("Std Acc: ", std_acc)
print("Max Acc: ", max_acc)
print("Min Acc: ", min_acc)
print("Avg Train Percent: ", avg_train_percent)
print("Best train percent: ", best_train_percent)
print("Difference (Max, Min): ", diff)

In [None]:
k = 10
best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

print(len(best_df), len(worst_df))
worst_df

In [None]:
best_vector = np.array([0]*12)

for i in range(len(best_df)):
    vector_path = best_df['Vector Path'][i]
    vector = np.load(vector_path)
    best_vector += vector

worst_vector = np.array([0]*12)

for i in range(len(worst_df)):
    vector_path = worst_df['Vector Path'][i]
    vector = np.load(vector_path)
    worst_vector += vector

best_vector, worst_vector

### How many times was each block selected?

In [None]:
# See which blocks were activated the maximum number of times

sum_vec = np.array([0]*12)

for i in range(len(df)):
    vec = np.load(df['Vector Path'][i])
    sum_vec += vec

In [None]:
indices = np.arange(len(sum_vec))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, sum_vec)

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Number of Times Trainable')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(sum_vec)+1)))
plt.title('Bar Plot of the number of times each attention block was activated (in 50 runs) in a ViT-Base Model.')

# Show the plot
plt.tight_layout()
plt.savefig("Random_Attention_Block_Tuning_{}.png".format(dataset))

In [None]:
indices = np.arange(len(best_vector))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, best_vector, label='Best')
plt.bar(indices, worst_vector, label='Worst')
plt.legend()

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Block Selection Count')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(best_vector)+1)))
plt.title('Comparing the attention block selection frequency for best and worst performing vectors.')

# Show the plot
plt.tight_layout()
plt.savefig("Selection_Comparison_{}.png".format(dataset))

### CIFAR10

In [None]:
dataset = 'CIFAR10'
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'

df = pd.read_csv(csv)
#df['Vector Path'] = df['Vector Path'].apply(lambda x: os.path.join(base_path, x.split('/')[-1]))
df.head()

In [None]:
mean_acc = df['Test Acc@1'].mean()
std_acc = df['Test Acc@1'].std()
max_acc = df['Test Acc@1'].max()
min_acc = df['Test Acc@1'].min()
avg_train_percent = df['Train Percent'].mean()
best_train_percent = df[df['Test Acc@1'] == max_acc]['Train Percent'].values[0]
diff = max_acc - min_acc

print("Mean Acc: ", mean_acc)
print("Std Acc: ", std_acc)
print("Max Acc: ", max_acc)
print("Min Acc: ", min_acc)
print("Avg Train Percent: ", avg_train_percent)
print("Best train percent: ", best_train_percent)
print("Difference (Max, Min): ", diff)

In [None]:
k = 10
best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

print(len(best_df), len(worst_df))
worst_df

In [None]:
best_vector = np.array([0]*12)

for i in range(len(best_df)):
    vector_path = best_df['Vector Path'][i]
    vector = np.load(vector_path)
    best_vector += vector

worst_vector = np.array([0]*12)

for i in range(len(worst_df)):
    vector_path = worst_df['Vector Path'][i]
    vector = np.load(vector_path)
    worst_vector += vector

best_vector, worst_vector

### How many times was each block selected?

In [None]:
# See which blocks were activated the maximum number of times

sum_vec = np.array([0]*12)

for i in range(len(df)):
    vec = np.load(df['Vector Path'][i])
    sum_vec += vec

In [None]:
indices = np.arange(len(sum_vec))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, sum_vec)

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Number of Times Trainable')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(sum_vec)+1)))
plt.title('Bar Plot of the number of times each attention block was activated (in 50 runs) in a ViT-Base Model.')

# Show the plot
plt.tight_layout()
plt.savefig("Random_Attention_Block_Tuning_{}.png".format(dataset))

In [None]:
indices = np.arange(len(best_vector))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, best_vector, label='Best')
plt.bar(indices, worst_vector, label='Worst')
plt.legend()

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Block Selection Count')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(best_vector)+1)))
plt.title('Comparing the attention block selection frequency for best and worst performing vectors.')

# Show the plot
plt.tight_layout()
plt.savefig("Selection_Comparison_{}.png".format(dataset))

### Retinopathy Dataset

In [None]:
dataset = 'retinopathy'
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'

df = pd.read_csv(csv)
#df['Vector Path'] = df['Vector Path'].apply(lambda x: os.path.join(base_path, x.split('/')[-1]))
df.head()

In [None]:
mean_acc = df['Test Acc@1'].mean()
std_acc = df['Test Acc@1'].std()
max_acc = df['Test Acc@1'].max()
min_acc = df['Test Acc@1'].min()
avg_train_percent = df['Train Percent'].mean()
best_train_percent = df[df['Test Acc@1'] == max_acc]['Train Percent'].values[0]
diff = max_acc - min_acc

print("Mean Acc: ", mean_acc)
print("Std Acc: ", std_acc)
print("Max Acc: ", max_acc)
print("Min Acc: ", min_acc)
print("Avg Train Percent: ", avg_train_percent)
print("Best Train Percent: ", best_train_percent)
print("Difference (Max, Min): ", diff)

In [None]:
k = 10
best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

print(len(best_df), len(worst_df))
worst_df

In [None]:
best_vector = np.array([0]*12)

for i in range(len(best_df)):
    vector_path = best_df['Vector Path'][i]
    vector = np.load(vector_path)
    best_vector += vector

worst_vector = np.array([0]*12)

for i in range(len(worst_df)):
    vector_path = worst_df['Vector Path'][i]
    vector = np.load(vector_path)
    worst_vector += vector

best_vector, worst_vector

### How many times was each block selected?

In [None]:
# See which blocks were activated the maximum number of times

sum_vec = np.array([0]*12)

for i in range(len(df)):
    vec = np.load(df['Vector Path'][i])
    sum_vec += vec

In [None]:
indices = np.arange(len(sum_vec))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, sum_vec)

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Number of Times Trainable')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(sum_vec)+1)))
plt.title('Bar Plot of the number of times each attention block was activated (in 50 runs) in a ViT-Base Model.')

# Show the plot
plt.tight_layout()
plt.savefig("Random_Attention_Block_Tuning_{}.png".format(dataset))

In [None]:
indices = np.arange(len(best_vector))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, best_vector, label='Best')
plt.bar(indices, worst_vector, label='Worst')
plt.legend()

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Block Selection Count')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(best_vector)+1)))
plt.title('Comparing the attention block selection frequency for best and worst performing vectors.')

# Show the plot
plt.tight_layout()
plt.savefig("Selection_Comparison_{}.png".format(dataset))

### CIFAR100

### Pneumonia

In [3]:
def last_row_to_first(df):
    last_row = df.iloc[-1]
    df = pd.concat([last_row.to_frame().T, df], ignore_index=True)
    df = df.drop(df.index[-1]).reset_index(drop=True)

    return df

def get_best_row(df):
    best_row = df.sort_values(by=['Test Acc@1'], ascending=False).head(1).reset_index(drop=True)
    return best_row

In [4]:
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'

csv_name = 'Fixed_Vectors_tune_attention_blocks_random_vit_base_0.0001.csv'
csv_name2 = 'Fixed_Vectors_tune_attention_vit_base.csv'
model = 'vit_base'

dataset = 'breastUS'
csv = os.path.join(base_path, model, dataset, csv_name)
csv2 = os.path.join(base_path, model, dataset, csv_name2)
df_breastUS = pd.read_csv(csv)
df2 = pd.read_csv(csv2)
best_row = get_best_row(df2)
df_breastUS = df_breastUS.append(best_row, ignore_index=True)
df_breastUS = last_row_to_first(df_breastUS)

dataset = 'fitzpatrick'
csv = os.path.join(base_path, model, dataset, csv_name)
csv2 = os.path.join(base_path, model, dataset, csv_name2)
df_fitzpatrick = pd.read_csv(csv)
df2 = pd.read_csv(csv2)
best_row = get_best_row(df2)
df_fitzpatrick = df_fitzpatrick.append(best_row, ignore_index=True)
df_fitzpatrick = last_row_to_first(df_fitzpatrick)

dataset = 'HAM10000'
csv = os.path.join(base_path, model, dataset, csv_name)
csv2 = os.path.join(base_path, model, dataset, csv_name2)
df_ham10k = pd.read_csv(csv)
df2 = pd.read_csv(csv2)
best_row = get_best_row(df2)
df_ham10k = df_ham10k.append(best_row, ignore_index=True)
df_ham10k = last_row_to_first(df_ham10k)

dataset = 'smdg'
csv = os.path.join(base_path, model, dataset, csv_name)
csv2 = os.path.join(base_path, model, dataset, csv_name2)
df_smdg = pd.read_csv(csv)
df2 = pd.read_csv(csv2)
best_row = get_best_row(df2)
df_smdg = df_smdg.append(best_row, ignore_index=True)
df_smdg = last_row_to_first(df_smdg)

dataset = 'retinopathy'
csv = os.path.join(base_path, model, dataset, csv_name)
csv2 = os.path.join(base_path, model, dataset, csv_name2)
df_retinopathy = pd.read_csv(csv)
df2 = pd.read_csv(csv2)
best_row = get_best_row(df2)
df_retinopathy = df_retinopathy.append(best_row, ignore_index=True)
df_retinopathy = last_row_to_first(df_retinopathy)

dataset = 'CIFAR10'
csv = os.path.join(base_path, model, dataset, csv_name)
csv2 = os.path.join(base_path, model, dataset, csv_name2)
df_CIFAR10 = pd.read_csv(csv)
df2 = pd.read_csv(csv2)
best_row = get_best_row(df2)
df_CIFAR10 = df_CIFAR10.append(best_row, ignore_index=True)
df_CIFAR10 = last_row_to_first(df_CIFAR10)

dataset = 'CIFAR100'
csv = os.path.join(base_path, model, dataset, csv_name)
csv2 = os.path.join(base_path, model, dataset, csv_name2)
df_CIFAR100 = pd.read_csv(csv)
df2 = pd.read_csv(csv2)
best_row = get_best_row(df2)
df_CIFAR100 = df_CIFAR100.append(best_row, ignore_index=True)
df_CIFAR100 = last_row_to_first(df_CIFAR100)

dataset = 'pneumonia'
csv = os.path.join(base_path, model, dataset, csv_name)
csv2 = os.path.join(base_path, model, dataset, csv_name2)
df_pneumonia = pd.read_csv(csv)
df2 = pd.read_csv(csv2)
best_row = get_best_row(df2)
df_pneumonia = df_pneumonia.append(best_row, ignore_index=True)
df_pneumonia = last_row_to_first(df_pneumonia)

df_pneumonia.head()

dataset_dict = {'breastUS': df_breastUS, 'fitzpatrick': df_fitzpatrick, 'HAM10000': df_ham10k, 'smdg': df_smdg, 'retinopathy': df_retinopathy, 'CIFAR10': df_CIFAR10, 'CIFAR100': df_CIFAR100, 'pneumonia': df_pneumonia}

  df_breastUS = df_breastUS.append(best_row, ignore_index=True)
  df_fitzpatrick = df_fitzpatrick.append(best_row, ignore_index=True)
  df_ham10k = df_ham10k.append(best_row, ignore_index=True)
  df_smdg = df_smdg.append(best_row, ignore_index=True)
  df_retinopathy = df_retinopathy.append(best_row, ignore_index=True)
  df_CIFAR10 = df_CIFAR10.append(best_row, ignore_index=True)
  df_CIFAR100 = df_CIFAR100.append(best_row, ignore_index=True)
  df_pneumonia = df_pneumonia.append(best_row, ignore_index=True)


In [5]:
len(df_breastUS), len(df_fitzpatrick), len(df_ham10k), len(df_retinopathy), len(df_CIFAR10), len(df_CIFAR100), len(df_pneumonia), len(df_smdg)

(76, 76, 76, 76, 76, 76, 76, 76)

### How does selective attention fine-tuning compare to full-attention fine-tuning?

In [6]:
best_performance = []
full_attention = []

for dataset in dataset_dict.keys():
    df = dataset_dict[dataset]
    print("Dataset: ", dataset)
    print("Best Performing Method: ", df[df['Test Acc@1'] == df['Test Acc@1'].max()]['Tuning Method'].values[0])
    print("Best Test Acc: ", df['Test Acc@1'].max())
    print("Full Attention FT: ", df[df['Tuning Method'] == 'tune_attention']['Test Acc@1'].values[0])
    print("Difference: ", df['Test Acc@1'].max() - df[df['Tuning Method'] == 'tune_attention']['Test Acc@1'].values[0])
    print("\n")
    best_performance.append(df['Test Acc@1'].max())
    full_attention.append(df[df['Tuning Method'] == 'tune_attention']['Test Acc@1'].values[0])
    

Dataset:  breastUS
Best Performing Method:  tune_attention_blocks_random
Best Test Acc:  94.80518846387989
Full Attention FT:  89.51077922077921
Difference:  5.294409243100674


Dataset:  fitzpatrick
Best Performing Method:  tune_attention_blocks_random
Best Test Acc:  84.94690818238601
Full Attention FT:  80.82299812617114
Difference:  4.123910056214868


Dataset:  HAM10000
Best Performing Method:  tune_attention_blocks_random
Best Test Acc:  91.70829170829172
Full Attention FT:  90.4095904095904
Difference:  1.2987012987013316


Dataset:  smdg
Best Performing Method:  tune_attention_blocks_random
Best Test Acc:  90.0974025974026
Full Attention FT:  89.12337662337663
Difference:  0.9740259740259773


Dataset:  retinopathy
Best Performing Method:  tune_attention_blocks_random
Best Test Acc:  77.39179954441913
Full Attention FT:  72.89293849658314
Difference:  4.498861047835987


Dataset:  CIFAR10
Best Performing Method:  tune_attention_blocks_random
Best Test Acc:  99.03
Full Attention

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Set the width of the bars
bar_width = 0.35

# Calculate the x-axis positions for the bars
x = np.arange(len(dataset_dict.keys()))
y = np.arange(0, 101, 5)

# Create the figure and axis objects
fig, ax = plt.subplots(figsize=(10, 12))

# Difference between the two bars
diff = np.array(best_performance) - np.array(full_attention)

# Plot the bars
ax.bar(x - bar_width/2, best_performance, width=bar_width, label='Selective Attention')
ax.bar(x + bar_width/2, full_attention, width=bar_width, label='Full Attention')

# Set the x-axis tick positions and labels
ax.set_xticks(x)
ax.set_yticks(y)
ax.set_xticklabels(list(dataset_dict.keys()))
ax.set_yticklabels(list(range(0, 101, 5)))
ax.set_xlabel('Dataset')
ax.set_ylabel('Test Accuracy')

# Set the legend
ax.legend()

# Display the plot
#plt.show()

plt.savefig('../plots/selective_vs_full_attention.png')


### Comparing Block Selection b/w best and worst performing vectors

In [None]:
# Sort all the dataframes

k = 10

all_datasets = [df_breastUS, df_fitzpatrick, df_smdg, df_ham10k, df_retinopathy, df_CIFAR10, df_CIFAR100, df_pneumonia]
_best = np.array([0]*12)
_worst = np.array([0]*12)

for df in all_datasets:
    best_vector, worst_vector = create_best_worst_vectors(df)
    _best += best_vector
    _worst += worst_vector

_best

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Set the width of the bars
bar_width = 0.35

# Calculate the x-axis positions for the bars
x = np.arange(len(_best))

# Create the figure and axis objects
fig, ax = plt.subplots(figsize=(10, 12))

# Plot the bars
ax.bar(x - bar_width/2, _best, width=bar_width, label='Best')
ax.bar(x + bar_width/2, _worst, width=bar_width, label='Worst')

# Set the x-axis tick positions and labels
ax.set_xticks(x)
ax.set_xticklabels(list(range(0,12)))
ax.set_xlabel('Block Index')
ax.set_ylabel('Block Selection Count')

# Set the legend
ax.legend()

# Display the plot
#plt.show()

plt.savefig('../plots/best_worst_blocks_selection_supervised_vitB.png')


## Ranking the vectors for each dataset

#### Do we have common vectors for best (and worst) performance?

In [None]:
k = 10

best_df_breastUS = df_breastUS.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df_breastUS = df_breastUS.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)
best_df_breastUS['Vector Index'] =  best_df_breastUS['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int).astype(int)
worst_df_breastUS['Vector Index'] =  worst_df_breastUS['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int).astype(int)

best_df_fitzpatrick = df_fitzpatrick.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df_fitzpatrick = df_fitzpatrick.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)
best_df_fitzpatrick['Vector Index'] =  best_df_fitzpatrick['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)
worst_df_fitzpatrick['Vector Index'] =  worst_df_fitzpatrick['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)

best_df_ham10k = df_ham10k.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df_ham10k = df_ham10k.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)
best_df_ham10k['Vector Index'] =  best_df_ham10k['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)
worst_df_ham10k['Vector Index'] =  worst_df_ham10k['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)

best_df_retinopathy = df_retinopathy.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df_retinopathy = df_retinopathy.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)
best_df_retinopathy['Vector Index'] =  best_df_retinopathy['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)
worst_df_retinopathy['Vector Index'] =  worst_df_retinopathy['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)

best_df_CIFAR10 = df_CIFAR10.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df_CIFAR10 = df_CIFAR10.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)
best_df_CIFAR10['Vector Index'] =  best_df_CIFAR10['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)
worst_df_CIFAR10['Vector Index'] =  worst_df_CIFAR10['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)

best_df_CIFAR100 = df_CIFAR100.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df_CIFAR100 = df_CIFAR100.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)
best_df_CIFAR100['Vector Index'] =  best_df_CIFAR100['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)
worst_df_CIFAR100['Vector Index'] =  worst_df_CIFAR100['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)

best_df_pneumonia = df_pneumonia.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df_pneumonia = df_pneumonia.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)
best_df_pneumonia['Vector Index'] =  best_df_pneumonia['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)
worst_df_pneumonia['Vector Index'] =  worst_df_pneumonia['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)

best_df_smdg = df_smdg.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df_smdg = df_smdg.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)
best_df_smdg['Vector Index'] =  best_df_smdg['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)
worst_df_smdg['Vector Index'] =  worst_df_smdg['Vector Path'].apply(lambda x: x.split('/')[-1].split('_')[-1].strip('.npy')).astype(int)

In [None]:
set_breastUS_best = set(best_df_breastUS['Vector Index'])
set_fitzpatrick_best = set(best_df_fitzpatrick['Vector Index'])
set_ham10k_best = set(best_df_ham10k['Vector Index'])
set_retinopathy_best = set(best_df_retinopathy['Vector Index'])
set_CIFAR10_best = set(best_df_CIFAR10['Vector Index'])
set_CIFAR100_best = set(best_df_CIFAR100['Vector Index'])
set_pneumonia_best = set(best_df_pneumonia['Vector Index'])
set_smdg_best = set(best_df_smdg['Vector Index'])

set_breastUS_worst = set(worst_df_breastUS['Vector Index'])
set_fitzpatrick_worst = set(worst_df_fitzpatrick['Vector Index'])
set_ham10k_worst = set(worst_df_ham10k['Vector Index'])
set_retinopathy_worst = set(worst_df_retinopathy['Vector Index'])
set_CIFAR10_worst = set(worst_df_CIFAR10['Vector Index'])
set_CIFAR100_worst = set(worst_df_CIFAR100['Vector Index'])
set_pneumonia_worst = set(worst_df_pneumonia['Vector Index'])
set_smdg_worst = set(worst_df_smdg['Vector Index'])

In [None]:
# Check if there is any overlap between the best and worst vectors

common_best_vector = set.intersection(set_breastUS_best, set_fitzpatrick_best, set_ham10k_best, set_retinopathy_best, set_CIFAR10_best, set_CIFAR100_best, set_pneumonia_best, set_smdg_best)
print("Common Best Vector: ", common_best_vector)  

common_worst_vector = set.intersection(set_breastUS_worst, set_fitzpatrick_worst, set_ham10k_worst, set_retinopathy_worst, set_CIFAR10_worst, set_CIFAR100_worst, set_pneumonia_worst, set_smdg_worst)
print("Common Worst Vector: ", common_worst_vector)

NOTE: There is no common vector between the best and worst vectors

### How sensitive is the final performance to the block activation?

In [None]:
for dataset in dataset_dict:
    df = dataset_dict[dataset]
    max_acc = df['Test Acc@1'].max()
    min_acc = df['Test Acc@1'].min()
    mean = df['Test Acc@1'].mean()
    std = df['Test Acc@1'].std()
    difference = max_acc - min_acc

    print("Max Acc {}: {}".format(dataset, max_acc))
    print("Min Acc {}: {}".format(dataset, min_acc))
    print("Difference {}: {}".format(dataset, difference))
    print("Mean {}: {}".format(dataset, mean))
    print("Std {}: {}".format(dataset, std))
    print("\n")

### How do the vectors of best and worst performance look like for each dataset?


In [6]:
for dataset in dataset_dict:
    df = dataset_dict[dataset]
    max_acc = df['Test Acc@1'].max()
    min_acc = df['Test Acc@1'].min()

    best_vector = np.load(df[df['Test Acc@1'] == max_acc]['Vector Path'].values[0])
    worst_vector = np.load(df[df['Test Acc@1'] == min_acc]['Vector Path'].values[0])
    cos_dist = distance.cosine(best_vector, worst_vector)
    cos_similarity = 1 - cos_dist
    hamming_dist = distance.hamming(best_vector, worst_vector)

    print("Dataset: ", dataset)
    print("Best Vector: ", best_vector)
    print("Worst Vector: ", worst_vector)
    print("Cosine Similarity: ", cos_similarity)
    print("Hamming Distance: ", hamming_dist)
    print("Performance Difference: ", max_acc - min_acc)
    print("\n")

Dataset:  breastUS
Best Vector:  [0 1 0 1 1 0 1 1 0 0 1 1]
Worst Vector:  [0 0 1 0 1 1 0 0 0 1 0 0]
Cosine Similarity:  0.18898223650461365
Hamming Distance:  0.75
Performance Difference:  15.584409243100666


Dataset:  fitzpatrick
Best Vector:  [0 1 1 0 1 1 0 1 1 0 1 1]
Worst Vector:  [1 1 1 0 1 0 1 1 1 1 1 0]
Cosine Similarity:  0.7071067811865475
Hamming Distance:  0.4166666666666667
Performance Difference:  14.553404122423487


Dataset:  HAM10000
Best Vector:  [1 1 0 0 1 0 1 1 0 1 1 1]
Worst Vector:  [0 1 0 0 0 1 0 0 0 0 0 0]
Cosine Similarity:  0.25
Hamming Distance:  0.6666666666666666
Performance Difference:  5.994005994006017


Dataset:  smdg
Best Vector:  [1 0 0 1 1 1 1 1 1 0 0 0]
Worst Vector:  [1 0 0 0 1 1 0 1 1 0 0 1]
Cosine Similarity:  0.7715167498104595
Hamming Distance:  0.25
Performance Difference:  6.57467532467534


Dataset:  retinopathy
Best Vector:  [0 0 1 1 1 0 0 1 0 0 1 0]
Worst Vector:  [1 1 1 0 1 0 1 1 1 1 1 0]
Cosine Similarity:  0.5962847939999438
Hamming Dis

### Plotting Cosine Similarity b/w Best and Worst vectors respectively for each dataset

In [None]:
max_acc_breastUS = df_breastUS['Test Acc@1'].max()
min_acc_breastUS = df_breastUS['Test Acc@1'].min()

max_acc_fitzpatrick = df_fitzpatrick['Test Acc@1'].max()
min_acc_fitzpatrick = df_fitzpatrick['Test Acc@1'].min()

max_acc_ham10k = df_ham10k['Test Acc@1'].max()
min_acc_ham10k = df_ham10k['Test Acc@1'].min()

max_acc_retinopathy = df_retinopathy['Test Acc@1'].max()
min_acc_retinopathy = df_retinopathy['Test Acc@1'].min()

max_acc_CIFAR10 = df_CIFAR10['Test Acc@1'].max()
min_acc_CIFAR10 = df_CIFAR10['Test Acc@1'].min()

max_acc_CIFAR100 = df_CIFAR100['Test Acc@1'].max()
min_acc_CIFAR100 = df_CIFAR100['Test Acc@1'].min()

max_acc_pneumonia = df_pneumonia['Test Acc@1'].max()
min_acc_pneumonia = df_pneumonia['Test Acc@1'].min()

max_acc_smdg = df_smdg['Test Acc@1'].max()
min_acc_smdg = df_smdg['Test Acc@1'].min()

In [None]:
best_vector_breastUS = np.load(df_breastUS[df_breastUS['Test Acc@1'] == max_acc_breastUS]['Vector Path'].values[0])
best_vector_fitzpatrick = np.load(df_fitzpatrick[df_fitzpatrick['Test Acc@1'] == max_acc_fitzpatrick]['Vector Path'].values[0])
best_vector_ham10k = np.load(df_ham10k[df_ham10k['Test Acc@1'] == max_acc_ham10k]['Vector Path'].values[0])
best_vector_smdg = np.load(df_smdg[df_smdg['Test Acc@1'] == max_acc_smdg]['Vector Path'].values[0])
best_vector_retinopathy = np.load(df_retinopathy[df_retinopathy['Test Acc@1'] == max_acc_retinopathy]['Vector Path'].values[0])
best_vector_CIFAR10 = np.load(df_CIFAR10[df_CIFAR10['Test Acc@1'] == max_acc_CIFAR10]['Vector Path'].values[0])
best_vector_CIFAR100 = np.load(df_CIFAR100[df_CIFAR100['Test Acc@1'] == max_acc_CIFAR100]['Vector Path'].values[0])
best_vector_pneumonia = np.load(df_pneumonia[df_pneumonia['Test Acc@1'] == max_acc_pneumonia]['Vector Path'].values[0])

worst_vector_breastUS = np.load(df_breastUS[df_breastUS['Test Acc@1'] == min_acc_breastUS]['Vector Path'].values[0])
worst_vector_fitzpatrick = np.load(df_fitzpatrick[df_fitzpatrick['Test Acc@1'] == min_acc_fitzpatrick]['Vector Path'].values[0])
worst_vector_ham10k = np.load(df_ham10k[df_ham10k['Test Acc@1'] == min_acc_ham10k]['Vector Path'].values[0])
worst_vector_smdg = np.load(df_smdg[df_smdg['Test Acc@1'] == min_acc_smdg]['Vector Path'].values[0])
worst_vector_retinopathy = np.load(df_retinopathy[df_retinopathy['Test Acc@1'] == min_acc_retinopathy]['Vector Path'].values[0])
worst_vector_CIFAR10 = np.load(df_CIFAR10[df_CIFAR10['Test Acc@1'] == min_acc_CIFAR10]['Vector Path'].values[0])
worst_vector_CIFAR100 = np.load(df_CIFAR100[df_CIFAR100['Test Acc@1'] == min_acc_CIFAR100]['Vector Path'].values[0])
worst_vector_pneumonia = np.load(df_pneumonia[df_pneumonia['Test Acc@1'] == min_acc_pneumonia]['Vector Path'].values[0])

all_best_vectors = [best_vector_breastUS, best_vector_fitzpatrick, best_vector_ham10k, best_vector_smdg, best_vector_retinopathy, best_vector_CIFAR10, best_vector_CIFAR100, best_vector_pneumonia]
all_worst_vectors = [worst_vector_breastUS, worst_vector_fitzpatrick, worst_vector_ham10k, worst_vector_smdg, worst_vector_retinopathy, worst_vector_CIFAR10, worst_vector_CIFAR100, worst_vector_pneumonia]

best_vectors = []
for vector in all_best_vectors:
    best_vectors.append(vector)

worst_vectors = []
for vector in all_worst_vectors:
    worst_vectors.append(vector)

In [None]:
num_vectors = len(best_vectors)
similarity_matrix = np.zeros((num_vectors, num_vectors))

for i in range(num_vectors):
    for j in range(num_vectors):
        cos_dist = distance.cosine(best_vectors[i], best_vectors[j])
        cos_similarity = 1 - cos_dist
        similarity_matrix[i, j] = cos_similarity
        #print("Cosine Sim b/w vector {} and {}: ".format(str(i), str(j)), cos_similarity)

similarity_df_best = pd.DataFrame(similarity_matrix)

similarity_matrix = np.zeros((num_vectors, num_vectors))
for i in range(num_vectors):
    for j in range(num_vectors):
        cos_dist = distance.cosine(worst_vectors[i], worst_vectors[j])
        cos_similarity = 1 - cos_dist
        similarity_matrix[i, j] = cos_similarity
        #print("Cosine Sim b/w vector {} and {}: ".format(str(i), str(j)), cos_similarity)

similarity_df_worst = pd.DataFrame(similarity_matrix)

In [None]:
# Create a confusion matrix using seaborn and save it as a figure
tuning_method = 'tune_attention_blocks_random'
top_k = 10
datasets = ['BreastUS', 'Fitzpatrick', 'HAM10000', 'SMDG', 'Retinopathy', 'CIFAR10', 'CIFAR100', 'Pneumonia']
pos = [0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5]

plt.figure(figsize=(10, 10))

sns.heatmap(similarity_df_best, annot=True, fmt='.2f', cmap='coolwarm')
plt.xlabel("Datasets")
plt.ylabel("Datasets")
plt.xticks(pos, datasets)
plt.yticks(pos, datasets)
plt.title("Cos-Sim b/w best performing vectors")
plt.savefig('../plots/best_cosine_sim_{}.png'.format(tuning_method))

In [None]:
# Create a confusion matrix using seaborn and save it as a figure
tuning_method = 'tune_attention_blocks_random'
top_k = 10
datasets = ['BreastUS', 'Fitzpatrick', 'HAM10000', 'SMDG', 'Retinopathy', 'CIFAR10', 'CIFAR100', 'Pneumonia']
pos = [0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5]

plt.figure(figsize=(10, 10))

sns.heatmap(similarity_df_worst, annot=True, fmt='.2f', cmap='coolwarm')
plt.xlabel("Vectors")
plt.ylabel("Vectors")
plt.xticks(pos, datasets)
plt.yticks(pos, datasets)
plt.title("Cos-Sim b/w worst performing vectors")
plt.savefig('../plots/worst_cosine_sim_{}.png'.format(tuning_method))

### Ranking each vector for all the datasets

In [None]:
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'

csv_name = 'Fixed_Vectors_tune_attention_blocks_random_vit_base_0.0001.csv'
csv_name2 = 'Fixed_Vectors_tune_attention_vit_base.csv'
model = 'vit_base'

dataset = 'breastUS'
csv = os.path.join(base_path, model, dataset, csv_name)
df_breastUS = pd.read_csv(csv)

dataset = 'fitzpatrick'
csv = os.path.join(base_path, model, dataset, csv_name)
df_fitzpatrick = pd.read_csv(csv)

dataset = 'HAM10000'
csv = os.path.join(base_path, model, dataset, csv_name)
df_ham10k = pd.read_csv(csv)

dataset = 'smdg'
csv = os.path.join(base_path, model, dataset, csv_name)
df_smdg = pd.read_csv(csv)

dataset = 'retinopathy'
csv = os.path.join(base_path, model, dataset, csv_name)
df_retinopathy = pd.read_csv(csv)

dataset = 'CIFAR10'
csv = os.path.join(base_path, model, dataset, csv_name)
df_CIFAR10 = pd.read_csv(csv)

dataset = 'CIFAR100'
csv = os.path.join(base_path, model, dataset, csv_name)
df_CIFAR100 = pd.read_csv(csv)

dataset = 'pneumonia'
csv = os.path.join(base_path, model, dataset, csv_name)
df_pneumonia = pd.read_csv(csv)

df_pneumonia.head()

dataset_dict = {'breastUS': df_breastUS, 'fitzpatrick': df_fitzpatrick, 'HAM10000': df_ham10k, 'smdg': df_smdg, 'retinopathy': df_retinopathy, 'CIFAR10': df_CIFAR10, 'CIFAR100': df_CIFAR100, 'pneumonia': df_pneumonia}

In [None]:
merged_df = pd.DataFrame()

merged_df['Tuning Method'] = df_breastUS['Tuning Method']
merged_df['Train Percent'] = df_breastUS['Train Percent']
merged_df['Vector'] = df_breastUS['Vector Path'].apply(lambda x: x.split('/')[-1])
merged_df['Combined Rank'] = 0


for dataset in dataset_dict.keys():
    df = dataset_dict[dataset]
    rank_col = dataset + '_rank'
    acc_col = dataset + '_acc'
    merged_df[rank_col] = df['Test Acc@1'].rank(ascending=False)
    #merged_df[acc_col] = df['Test Acc@1']

for i in range(len(merged_df)):
    merged_df['Combined Rank'][i] = merged_df.iloc[i, 3:11].sum()/8

merged_df.head()

In [None]:
k = 10
merged_df_best = merged_df.sort_values(by=['Combined Rank'], ascending=True).head(k).reset_index(drop=True) #Smaller the rank the better
merged_df_worst = merged_df.sort_values(by=['Combined Rank'], ascending=False).head(k).reset_index(drop=True)

merged_df_best.head()