In [4]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import timm
import torch
import torch.nn as nn

from scipy.spatial import distance
import seaborn as sns


In [11]:
def disable_module(module):
    for p in module.parameters():
        p.requires_grad = False
        
def enable_module(module):
    for p in module.parameters():
        p.requires_grad = True


def check_tunable_params(model, verbose=True):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    
    for name, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            if(verbose):
                print(name)
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.5f}"
    )

    return trainable_params, all_param

def create_mapping(model, vector):
    mapping = {}
    i = 0

    for name_p,p in model.named_parameters():
        if '.attn.' in name_p or 'attention' in name_p:
            mapping[name_p] = vector[i]
            i += 1
        else:
            p.requires_grad = False
            
    return mapping

def sort_dict(dict, descending=False):
    sorted_dict = dict(sorted(dict.items(), key=lambda item: item[1], reverse=descending))
    
    return sorted_dict

def get_modules_from_vector(vector, model):
    trainable_blocks = []
    frozen_blocks = []
    
    trainable_blocks = np.where(np.array(vector) == 1)
    frozen_blocks = np.where(np.array(vector) == 0)
    
    return trainable_blocks, frozen_blocks

def get_model_for_bitfit(model):
    trainable_components = ['bias', 'pooler.dense.bias', 'head'] 

    # Disale all the gradients
    for param in model.parameters():
        param.requires_grad = False 
      
    vector = []

    for name, param in model.named_parameters():
        for component in trainable_components:
            if component in name:
                vector.append(1)
                param.requires_grad = True
                break
    
    return vector

def enable_from_vector(vector, model):
    print("Vector: ", vector)
    
    disable_module(model)
    
    for idx, block in enumerate(model.blocks): 
    
        if(vector[idx] == 1):
            print("Enabling attention in Block {}".format(idx))
            enable_module(block.attn)
        else:
            #print("Disabling attention in Block {}".format(idx))
            disable_module(block.attn)

def create_best_worst_vectors(df, k=10):
    best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
    worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

    best_vector = np.array([0]*12)

    for i in range(len(best_df)):
        vector_path = best_df['Vector Path'][i]
        vector = np.load(vector_path)
        best_vector += vector

    worst_vector = np.array([0]*12)

    for i in range(len(worst_df)):
        vector_path = worst_df['Vector Path'][i]
        vector = np.load(vector_path)
        worst_vector += vector

    return best_vector, worst_vector

def tune_blocks_random(model, mask, segment):

    vector = []

    for idx, block in enumerate(model.blocks):

        if(mask is None):
            bit = int(np.random.random(1)[0] > 0.5)
        else:
            bit = mask[idx]

        if(bit == 1):
            print("Enabling {} in Block {}".format(segment, idx))
            if(segment == 'attention'):
                enable_module(block.attn)
            elif(segment == 'layernorm'):
                enable_module(block.norm1)
                enable_module(block.norm2)

            vector.append(1)
        else:
            print("Disabling {} in Block {}".format(segment, idx))
            if(segment == 'attention'):
                disable_module(block.attn)
            elif(segment == 'layernorm'):
                disable_module(block.norm1)
                disable_module(block.norm2)
            
            vector.append(0)
    
    if(mask is not None):
        assert (mask == vector)
        
    return vector

In [None]:
model = timm.create_model('vit_base_patch16_224', pretrained=False)
num_blocks = len(model.blocks)
mask = list(np.random.randint(low=0, high=2, size=num_blocks))

disable_module(model)


vector = tune_blocks_random(model, mask, 'attention')
print("Vector: ", vector)
print("Mask: ", mask)

check_tunable_params(model, True)

In [None]:
path = "/home/co-dutt1/rds/hpc-work/Layer-Masking/Experiment_Vectors/"

for i in range(1, 51):
    vector = np.random.randint(low=0, high=2, size=num_blocks)
    np.save(path + "random_vector_{}.npy".format(i), vector)

# Random Attention Tuning (Block Level)

### BreastUS Dataset

In [None]:
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'
csv = base_path + 'vit_base/breastUS/' + 'tune_attention_blocks_random_vit_base_0.0001.csv'

df = pd.read_csv(csv)
#df['Vector Path'] = df['Vector Path'].apply(lambda x: os.path.join(base_path, x.split('/')[-1]))
df.head()

In [None]:
mean_acc = df['Test Acc@1'].mean()
std_acc = df['Test Acc@1'].std()
max_acc = df['Test Acc@1'].max()
min_acc = df['Test Acc@1'].min()
avg_train_percent = df['Train Percent'].mean()
diff = max_acc - min_acc
best_train_percent = df[df['Test Acc@1'] == max_acc]['Train Percent'].values[0]

print("Mean Acc: ", mean_acc)
print("Std Acc: ", std_acc)
print("Max Acc: ", max_acc)
print("Min Acc: ", min_acc)
print("Avg Train Percent: ", avg_train_percent)
print("Best Performance Train Percent: ", best_train_percent)
print("Diff: ", diff)

In [None]:
# lower_threshold = df['Test Acc@1'].quantile(0.10)
# upper_threshold = df['Test Acc@1'].quantile(0.90)

# top_1_percent = df[df['Test Acc@1'] >= upper_threshold].reset_index(drop=True)
# bottom_1_percent = df[df['Test Acc@1'] <= lower_threshold].reset_index(drop=True)

# bottom_1_percent

k = 10
best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

print(len(best_df), len(worst_df))
best_df

In [None]:
best_vector = np.array([0]*12)

for i in range(len(best_df)):
    vector_path = best_df['Vector Path'][i]
    vector = np.load(vector_path)
    best_vector += vector

worst_vector = np.array([0]*12)

for i in range(len(worst_df)):
    vector_path = worst_df['Vector Path'][i]
    vector = np.load(vector_path)
    worst_vector += vector

best_vector, worst_vector

### How many times was each block trained during 50 runs?

In [None]:
# See which blocks were activated the maximum number of times

sum_vec = np.array([0]*12)

for i in range(len(df)):
    vec = np.load(df['Vector Path'][i])
    sum_vec += vec

In [None]:
indices = np.arange(len(sum_vec))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, sum_vec)

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Number of Times Trainable')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(sum_vec)+1)))
plt.title('Bar Plot of the number of times each attention block was activated (in 50 runs) in a ViT-Base Model.')

# Show the plot
plt.tight_layout()
plt.savefig("Random_Attention_Block_Tuning_breastUS.png")

In [None]:
indices = np.arange(len(best_vector))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, best_vector, label='Best')
plt.bar(indices, worst_vector, label='Worst')
plt.legend()

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Block Selection Count')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(best_vector)+1)))
plt.title('Comparing the attention block selection frequency for best and worst performing vectors.')

# Show the plot
plt.tight_layout()
plt.savefig("Selection_Comparison_BreastUS.png")

We see that the best performing vectors tune later blocks more than the worst performing vectors.

### FitzPatrick Dataset

In [None]:
dataset = 'fitzpatrick'
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'

df = pd.read_csv(csv)
#df['Vector Path'] = df['Vector Path'].apply(lambda x: os.path.join(base_path, x.split('/')[-1]))
df.head()

In [None]:
mean_acc = df['Test Acc@1'].mean()
std_acc = df['Test Acc@1'].std()
max_acc = df['Test Acc@1'].max()
min_acc = df['Test Acc@1'].min()
avg_train_percent = df['Train Percent'].mean()
best_train_percent = df[df['Test Acc@1'] == max_acc]['Train Percent'].values[0]
diff = max_acc - min_acc

print("Mean Acc: ", mean_acc)
print("Std Acc: ", std_acc)
print("Max Acc: ", max_acc)
print("Min Acc: ", min_acc)
print("Avg Train Percent: ", avg_train_percent)
print("Best Performance Train Percent: ", best_train_percent)
print("Difference (Max, Min): ", diff)

In [None]:
k = 10
best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

print(len(best_df), len(worst_df))
best_df

In [None]:
best_vector = np.array([0]*12)

for i in range(len(best_df)):
    vector_path = best_df['Vector Path'][i]
    vector = np.load(vector_path)
    best_vector += vector

worst_vector = np.array([0]*12)

for i in range(len(worst_df)):
    vector_path = worst_df['Vector Path'][i]
    vector = np.load(vector_path)
    worst_vector += vector

best_vector, worst_vector

### How many times was each block selected?

In [None]:
# See which blocks were activated the maximum number of times

sum_vec = np.array([0]*12)

for i in range(len(df)):
    vec = np.load(df['Vector Path'][i])
    sum_vec += vec

In [None]:
indices = np.arange(len(sum_vec))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, sum_vec)

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Number of Times Trainable')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(sum_vec)+1)))
plt.title('Bar Plot of the number of times each attention block was activated (in 50 runs) in a ViT-Base Model.')

# Show the plot
plt.tight_layout()
plt.savefig("Random_Attention_Block_Tuning_{}.png".format(dataset))

In [None]:
indices = np.arange(len(best_vector))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, best_vector, label='Best')
plt.bar(indices, worst_vector, label='Worst')
plt.legend()

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Block Selection Count')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(best_vector)+1)))
plt.title('Comparing the attention block selection frequency for best and worst performing vectors.')

# Show the plot
plt.tight_layout()
plt.savefig("Selection_Comparison_{}.png".format(dataset))

### SMDG Dataset

In [None]:
dataset = 'smdg'
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'

df = pd.read_csv(csv)
#df['Vector Path'] = df['Vector Path'].apply(lambda x: os.path.join(base_path, x.split('/')[-1]))
df.head()

In [None]:
mean_acc = df['Test Acc@1'].mean()
std_acc = df['Test Acc@1'].std()
max_acc = df['Test Acc@1'].max()
min_acc = df['Test Acc@1'].min()
avg_train_percent = df['Train Percent'].mean()
best_train_percent = df[df['Test Acc@1'] == max_acc]['Train Percent'].values[0]
diff = max_acc - min_acc

print("Mean Acc: ", mean_acc)
print("Std Acc: ", std_acc)
print("Max Acc: ", max_acc)
print("Min Acc: ", min_acc)
print("Avg Train Percent: ", avg_train_percent)
print("Best Performance Train Percent: ", best_train_percent)
print("Difference (Max, Min): ", diff)

In [None]:
k = 10
best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

print(len(best_df), len(worst_df))
best_df

In [None]:
best_vector = np.array([0]*12)

for i in range(len(best_df)):
    vector_path = best_df['Vector Path'][i]
    vector = np.load(vector_path)
    best_vector += vector

worst_vector = np.array([0]*12)

for i in range(len(worst_df)):
    vector_path = worst_df['Vector Path'][i]
    vector = np.load(vector_path)
    worst_vector += vector

best_vector, worst_vector

### How many times was each block selected?


In [None]:
# See which blocks were activated the maximum number of times

sum_vec = np.array([0]*12)

for i in range(len(df)):
    vec = np.load(df['Vector Path'][i])
    sum_vec += vec

In [None]:
indices = np.arange(len(sum_vec))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, sum_vec)

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Number of Times Trainable')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(sum_vec)+1)))
plt.title('Bar Plot of the number of times each attention block was activated (in 50 runs) in a ViT-Base Model.')

# Show the plot
plt.tight_layout()
plt.savefig("Random_Attention_Block_Tuning_{}.png".format(dataset))

In [None]:
indices = np.arange(len(best_vector))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, best_vector, label='Best')
plt.bar(indices, worst_vector, label='Worst')
plt.legend()

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Block Selection Count')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(best_vector)+1)))
plt.title('Comparing the attention block selection frequency for best and worst performing vectors.')

# Show the plot
plt.tight_layout()
plt.savefig("Selection_Comparison_{}.png".format(dataset))

### HAM10000 Dataset

In [None]:
dataset = 'HAM10000'
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'
vector_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/saved_vectors/vit_base/HAM10000/tune_attention_blocks_random_0.0001/'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'

df = pd.read_csv(csv)
df['Vector Path'] = df['Vector Path'].apply(lambda x: os.path.join(vector_path, x.split('/')[-1]))
df.head()

In [None]:
mean_acc = df['Test Acc@1'].mean()
std_acc = df['Test Acc@1'].std()
max_acc = df['Test Acc@1'].max()
min_acc = df['Test Acc@1'].min()
avg_train_percent = df['Train Percent'].mean()
best_train_percent = df[df['Test Acc@1'] == max_acc]['Train Percent'].values[0]
diff = max_acc - min_acc

print("Mean Acc: ", mean_acc)
print("Std Acc: ", std_acc)
print("Max Acc: ", max_acc)
print("Min Acc: ", min_acc)
print("Avg Train Percent: ", avg_train_percent)
print("Best train percent: ", best_train_percent)
print("Difference (Max, Min): ", diff)

In [None]:
k = 10
best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

print(len(best_df), len(worst_df))
worst_df

In [None]:
best_vector = np.array([0]*12)

for i in range(len(best_df)):
    vector_path = best_df['Vector Path'][i]
    vector = np.load(vector_path)
    best_vector += vector

worst_vector = np.array([0]*12)

for i in range(len(worst_df)):
    vector_path = worst_df['Vector Path'][i]
    vector = np.load(vector_path)
    worst_vector += vector

best_vector, worst_vector

### How many times was each block selected?

In [None]:
# See which blocks were activated the maximum number of times

sum_vec = np.array([0]*12)

for i in range(len(df)):
    vec = np.load(df['Vector Path'][i])
    sum_vec += vec

In [None]:
indices = np.arange(len(sum_vec))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, sum_vec)

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Number of Times Trainable')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(sum_vec)+1)))
plt.title('Bar Plot of the number of times each attention block was activated (in 50 runs) in a ViT-Base Model.')

# Show the plot
plt.tight_layout()
plt.savefig("Random_Attention_Block_Tuning_{}.png".format(dataset))

In [None]:
indices = np.arange(len(best_vector))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, best_vector, label='Best')
plt.bar(indices, worst_vector, label='Worst')
plt.legend()

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Block Selection Count')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(best_vector)+1)))
plt.title('Comparing the attention block selection frequency for best and worst performing vectors.')

# Show the plot
plt.tight_layout()
plt.savefig("Selection_Comparison_{}.png".format(dataset))

### CIFAR10

In [None]:
dataset = 'CIFAR10'
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'

df = pd.read_csv(csv)
#df['Vector Path'] = df['Vector Path'].apply(lambda x: os.path.join(base_path, x.split('/')[-1]))
df.head()

In [None]:
mean_acc = df['Test Acc@1'].mean()
std_acc = df['Test Acc@1'].std()
max_acc = df['Test Acc@1'].max()
min_acc = df['Test Acc@1'].min()
avg_train_percent = df['Train Percent'].mean()
best_train_percent = df[df['Test Acc@1'] == max_acc]['Train Percent'].values[0]
diff = max_acc - min_acc

print("Mean Acc: ", mean_acc)
print("Std Acc: ", std_acc)
print("Max Acc: ", max_acc)
print("Min Acc: ", min_acc)
print("Avg Train Percent: ", avg_train_percent)
print("Best train percent: ", best_train_percent)
print("Difference (Max, Min): ", diff)

In [None]:
k = 10
best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

print(len(best_df), len(worst_df))
worst_df

In [None]:
best_vector = np.array([0]*12)

for i in range(len(best_df)):
    vector_path = best_df['Vector Path'][i]
    vector = np.load(vector_path)
    best_vector += vector

worst_vector = np.array([0]*12)

for i in range(len(worst_df)):
    vector_path = worst_df['Vector Path'][i]
    vector = np.load(vector_path)
    worst_vector += vector

best_vector, worst_vector

### How many times was each block selected?

In [None]:
# See which blocks were activated the maximum number of times

sum_vec = np.array([0]*12)

for i in range(len(df)):
    vec = np.load(df['Vector Path'][i])
    sum_vec += vec

In [None]:
indices = np.arange(len(sum_vec))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, sum_vec)

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Number of Times Trainable')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(sum_vec)+1)))
plt.title('Bar Plot of the number of times each attention block was activated (in 50 runs) in a ViT-Base Model.')

# Show the plot
plt.tight_layout()
plt.savefig("Random_Attention_Block_Tuning_{}.png".format(dataset))

In [None]:
indices = np.arange(len(best_vector))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, best_vector, label='Best')
plt.bar(indices, worst_vector, label='Worst')
plt.legend()

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Block Selection Count')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(best_vector)+1)))
plt.title('Comparing the attention block selection frequency for best and worst performing vectors.')

# Show the plot
plt.tight_layout()
plt.savefig("Selection_Comparison_{}.png".format(dataset))

### Retinopathy Dataset

In [None]:
dataset = 'retinopathy'
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'

df = pd.read_csv(csv)
#df['Vector Path'] = df['Vector Path'].apply(lambda x: os.path.join(base_path, x.split('/')[-1]))
df.head()

In [None]:
mean_acc = df['Test Acc@1'].mean()
std_acc = df['Test Acc@1'].std()
max_acc = df['Test Acc@1'].max()
min_acc = df['Test Acc@1'].min()
avg_train_percent = df['Train Percent'].mean()
best_train_percent = df[df['Test Acc@1'] == max_acc]['Train Percent'].values[0]
diff = max_acc - min_acc

print("Mean Acc: ", mean_acc)
print("Std Acc: ", std_acc)
print("Max Acc: ", max_acc)
print("Min Acc: ", min_acc)
print("Avg Train Percent: ", avg_train_percent)
print("Best Train Percent: ", best_train_percent)
print("Difference (Max, Min): ", diff)

In [None]:
k = 10
best_df = df.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
worst_df = df.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

print(len(best_df), len(worst_df))
worst_df

In [None]:
best_vector = np.array([0]*12)

for i in range(len(best_df)):
    vector_path = best_df['Vector Path'][i]
    vector = np.load(vector_path)
    best_vector += vector

worst_vector = np.array([0]*12)

for i in range(len(worst_df)):
    vector_path = worst_df['Vector Path'][i]
    vector = np.load(vector_path)
    worst_vector += vector

best_vector, worst_vector

### How many times was each block selected?

In [None]:
# See which blocks were activated the maximum number of times

sum_vec = np.array([0]*12)

for i in range(len(df)):
    vec = np.load(df['Vector Path'][i])
    sum_vec += vec

In [None]:
indices = np.arange(len(sum_vec))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, sum_vec)

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Number of Times Trainable')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(sum_vec)+1)))
plt.title('Bar Plot of the number of times each attention block was activated (in 50 runs) in a ViT-Base Model.')

# Show the plot
plt.tight_layout()
plt.savefig("Random_Attention_Block_Tuning_{}.png".format(dataset))

In [None]:
indices = np.arange(len(best_vector))

plt.figsize=(20, 10)
# Plot the bar graph
plt.bar(indices, best_vector, label='Best')
plt.bar(indices, worst_vector, label='Worst')
plt.legend()

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Block Selection Count')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(best_vector)+1)))
plt.title('Comparing the attention block selection frequency for best and worst performing vectors.')

# Show the plot
plt.tight_layout()
plt.savefig("Selection_Comparison_{}.png".format(dataset))

### CIFAR100

### Pneumonia

### Cosine Similarity b/w best performing vectors

In [None]:
base_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/'

dataset = 'breastUS'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'
df_breastUS = pd.read_csv(csv)

dataset = 'fitzpatrick'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'
df_fitzpatrick = pd.read_csv(csv)

dataset = 'HAM10000'
vector_path = '/home/co-dutt1/rds/hpc-work/Layer-Masking/saved_vectors/vit_base/HAM10000/tune_attention_blocks_random_0.0001/'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'
df_ham10k = pd.read_csv(csv)
df_ham10k['Vector Path'] = df_ham10k['Vector Path'].apply(lambda x: os.path.join(vector_path, x.split('/')[-1]))

dataset = 'smdg'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'
df_smdg = pd.read_csv(csv)

dataset = 'retinopathy'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'
df_retinopathy = pd.read_csv(csv)

dataset = 'CIFAR10'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'
df_CIFAR10 = pd.read_csv(csv)

dataset = 'CIFAR100'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'
df_CIFAR100 = pd.read_csv(csv)

dataset = 'pneumonia'
csv = base_path + 'vit_base/' + dataset + '/tune_attention_blocks_random_vit_base_0.0001.csv'
df_pneumonia = pd.read_csv(csv)


In [None]:
# Sort all the dataframes

k = 10
# best_df_breastUS = df_breastUS.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
# worst_df_breastUS = df_breastUS.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

# best_df_fitzpatrick = df_fitzpatrick.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
# worst_df_fitzpatrick = df_fitzpatrick.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

# best_df_ham10k = df_ham10k.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
# worst_df_ham10k = df_ham10k.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

# best_df_smdg = df_smdg.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
# worst_df_smdg = df_smdg.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

# best_df_retinopathy = df_retinopathy.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
# worst_df_retinopathy = df_retinopathy.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

# best_df_CIFAR10 = df_CIFAR10.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
# worst_df_CIFAR10 = df_CIFAR10.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

# best_df_CIFAR100 = df_CIFAR100.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
# worst_df_CIFAR100 = df_CIFAR100.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

# best_df_pneumonia = df_pneumonia.sort_values(by=['Test Acc@1'], ascending=False).head(k).reset_index(drop=True)
# worst_df_pneumonia = df_pneumonia.sort_values(by=['Test Acc@1'], ascending=True).head(k).reset_index(drop=True)

all_datasets = [df_breastUS, df_fitzpatrick, df_ham10k, df_smdg, df_retinopathy, df_CIFAR10, df_CIFAR100, df_pneumonia]
_best = np.array([0]*12)
_worst = np.array([0]*12)

for df in all_datasets:
    best_vector, worst_vector = create_best_worst_vectors(df)
    _best += best_vector
    _worst += worst_vector

_best

In [None]:
# Creating best and worst vectors for each dataset

indices = np.arange(len(_best))

plt.figsize=(20, 15)
# Plot the bar graph
plt.bar(indices, _best, label='Best')
plt.bar(indices, _worst, label='Worst')
plt.legend()

# Add labels and title
plt.xlabel('Block Index')
plt.ylabel('Block Selection Count')
plt.xticks(list(range(0,12)))
plt.yticks(list(range(0, max(_best)+1, 4)))
plt.title('Comparing the attention block selection frequency for best and worst performing vectors.')

# Show the plot
plt.tight_layout()
plt.savefig("vit-b_best_worst_block_level.png")

In [None]:
max_acc_breastUS = df_breastUS['Test Acc@1'].max()
max_acc_fitzpatrick = df_fitzpatrick['Test Acc@1'].max()
max_acc_ham10k = df_ham10k['Test Acc@1'].max()
max_acc_smdg = df_smdg['Test Acc@1'].max()
max_acc_retinopathy = df_retinopathy['Test Acc@1'].max()
max_acc_CIFAR10 = df_CIFAR10['Test Acc@1'].max()
max_acc_CIFAR100 = df_CIFAR100['Test Acc@1'].max()
max_acc_pneumonia = df_pneumonia['Test Acc@1'].max()

In [None]:
best_vector_breastUS = np.load(df_breastUS[df_breastUS['Test Acc@1'] == max_acc_breastUS]['Vector Path'].values[0])
best_vector_fitzpatrick = np.load(df_fitzpatrick[df_fitzpatrick['Test Acc@1'] == max_acc_fitzpatrick]['Vector Path'].values[0])
best_vector_ham10k = np.load(df_ham10k[df_ham10k['Test Acc@1'] == max_acc_ham10k]['Vector Path'].values[0])
best_vector_smdg = np.load(df_smdg[df_smdg['Test Acc@1'] == max_acc_smdg]['Vector Path'].values[0])
best_vector_retinopathy = np.load(df_retinopathy[df_retinopathy['Test Acc@1'] == max_acc_retinopathy]['Vector Path'].values[0])
best_vector_CIFAR10 = np.load(df_CIFAR10[df_CIFAR10['Test Acc@1'] == max_acc_CIFAR10]['Vector Path'].values[0])
best_vector_CIFAR100 = np.load(df_CIFAR100[df_CIFAR100['Test Acc@1'] == max_acc_CIFAR100]['Vector Path'].values[0])
best_vector_pneumonia = np.load(df_pneumonia[df_pneumonia['Test Acc@1'] == max_acc_pneumonia]['Vector Path'].values[0])

#all_best_vectors = [best_vector_breastUS, best_vector_fitzpatrick, best_vector_ham10k, best_vector_smdg, best_vector_retinopathy, best_vector_CIFAR10]
all_best_vectors = [best_vector_breastUS, best_vector_fitzpatrick, best_vector_ham10k, best_vector_smdg, best_vector_retinopathy, best_vector_CIFAR10, best_vector_CIFAR100, best_vector_pneumonia]

vectors = []
for vector in all_best_vectors:
    vectors.append(vector)

In [None]:
num_vectors = len(vectors)
similarity_matrix = np.zeros((num_vectors, num_vectors))

for i in range(num_vectors):
    for j in range(num_vectors):
        cos_dist = distance.cosine(vectors[i], vectors[j])
        cos_similarity = 1 - cos_dist
        similarity_matrix[i, j] = cos_similarity
        print("Cosine Sim b/w vector {} and {}: ".format(str(i), str(j)), cos_similarity)

similarity_df = pd.DataFrame(similarity_matrix)

In [None]:
# Create a confusion matrix using seaborn and save it as a figure
tuning_method = 'tune_attention_blocks_random'
top_k = 10
datasets = ['BreastUS', 'Fitzpatrick', 'HAM10000', 'SMDG', 'Retinopathy', 'CIFAR10']
pos = [0.5,1.5,2.5,3.5,4.5,5.5]

plt.figure(figsize=(10, 10))

sns.heatmap(similarity_df, annot=True, fmt='.2f', cmap='coolwarm')
plt.xlabel("Vectors")
plt.ylabel("Vectors")
plt.xticks(pos, datasets)
plt.yticks(pos, datasets)
plt.title("Cos-Sim b/w best vectors")
plt.savefig('cosine_similarity_cm_{}_top_{}.png'.format(tuning_method, str(top_k))) 