In [1]:
import numpy as np

# Load npz file
data = np.load('../data/caltech-256_features.npz')
# data = np.load('imagenet_features.npz')

# Extract feature and label arrays
X_vit = data['vit_features']
X_clip = data['clip_features']
y = data['labels']

labels = np.unique(y)
num_classes = len(labels) # 257

# Group feature vectors by class label
vit_class = [X_vit[np.where(y == label)[0]] for label in labels]
clip_class = [X_clip[np.where(y == label)[0]] for label in labels]

# Calculate mean vector for each class
vit_mean = [np.mean(g, axis=0) for g in vit_class]
clip_mean = [np.mean(g, axis=0) for g in clip_class]

In [2]:
num_classes

257

In [3]:
center = X_vit.mean(0)
(center * center).sum()

16471.045

In [4]:
(X_vit * X_vit).sum(1).max()

244925.12

In [None]:
from scipy.spatial.distance import pdist, squareform

# Compute pairwise distance matrix
# Convert to square distance matrix
vit_distance_matrix = squareform(pdist(vit_mean))
clip_distance_matrix = squareform(pdist(clip_mean))

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute pairwise cosine similarities
vit_similarities = cosine_similarity(vit_mean)
clip_similarities = cosine_similarity(clip_mean)

In [None]:
import json
with open('imagenet_labels.json') as f:
    classes = json.load(f)
print(classes[:10])

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mplcursors
%matplotlib widget

# Load label to class mapping from CSV file
class_labels = pd.read_csv('256_ObjectCategories_map.csv')

# Get the top 5 distances and corresponding class labels for each class
num_top = 5
top_distances = np.argsort(clip_distance_matrix, axis=1)[:, :num_top]
top_labels = np.array([class_labels.iloc[top_distances[i]]['class'].values for i in range(num_classes)])

# Set up plot with larger size
fig, ax = plt.subplots(figsize=(15, 10))

# Plot similarity matrix
im = ax.imshow(clip_distance_matrix)

# Add annotations
cursor = mplcursors.cursor(im)

# Define annotation text
annotations = [[f"{class_labels.iloc[i]['class']}\n{class_labels.iloc[j]['class']}: {clip_distance_matrix[i,j]:.3f}"
                for j in range(num_classes)] for i in range(num_classes)]

# Define annotation position
positions = [(j, i) for i in range(num_classes) for j in range(num_classes)]

# Add annotations to cursor
@cursor.connect("add")
def on_add(sel):
    idx = sel.target.index
    label_i, label_j = class_labels.iloc[positions[idx]].values
    top_dist = top_distances[label_i == class_labels.index][0]
    top_label = top_labels[label_i == class_labels.index][0]
    top_text = '\n'.join([f"{class_labels.loc[i]['class']}: {clip_distance_matrix[label_i,i]:.3f}" for i in top_dist])
    text = f"{label_i}, {label_j}\n\n{top_text}"
    sel.annotation.set(text=text, position=(sel.target.coords[0], sel.target.coords[1]), 
                        ha='left', va='center', fontsize=10)

# Set tick labels
step = 1
ticks = range(num_classes)
ax.set_xticks(ticks[::step])
ax.set_yticks(ticks)
ax.set_xticklabels([class_labels.loc[i]['class'] for i in ticks[::step]], rotation=45, ha='right', fontsize=10)
ax.set_yticklabels([class_labels.loc[i]['class'] for i in ticks], fontsize=10)

# Set axis labels and title
ax.set_xlabel('Class', fontsize=12)
ax.set_ylabel('Class', fontsize=12)
fig.suptitle('ViT Embedding - dist', fontsize=14)

cb = fig.colorbar(im, ax=ax)
cb.set_label('Distances')

plt.show()

In [None]:
clip_distance_matrix.mean(0)

In [None]:
import matplotlib.pyplot as plt

# Set up the plot
fig, ax = plt.subplots()

# Create the histogram
ax.hist(clip_distance_matrix[13], bins=20)

# Set the axis labels and title
ax.set_xlabel('Value')
ax.set_ylabel('Count')
ax.set_title('Histogram of Values')

plt.savefig('clip_dist_hist.png')

In [None]:
clip_distance_matrix.argmax(0)

In [None]:
clip_matrix = clip_distance_matrix
vit_matrix = vit_distance_matrix
clip_matrix = clip_similarities
vit_matrix = vit_similarities

# Read the class-to-label mapping from a CSV file with 'id' and 'class' columns
# class_labels = pd.read_csv('256_ObjectCategories_map.csv')

# classes = [str(i) + ":" + class_labels.loc[i]['class'] for i in range(num_classes)]

# Find the indices that would sort each row in ascending order
clip_indices = np.argsort(-clip_matrix, axis=1)
vit_indices = np.argsort(-vit_matrix, axis=1)

# Print the top 5 values, corresponding indices, and class labels for each row
for i in range(clip_distance_matrix.shape[0]):
    print(f"top5 for class {str(i) + ': ' + classes[i]}")
    print(f"\tclip: {[classes[clip_indices[i][j]] for j in range(1,6)]}")
    print(f"\tcosine: {[round(clip_matrix[i,clip_indices[i][j]], 3) for j in range(1,6)]}")
    print(f"\tViT: {[classes[vit_indices[i][j]] for j in range(1,5)]}")
    print(f"\tcosine: {[round(vit_matrix[i,clip_indices[i][j]], 3) for j in range(1,6)]}")

In [None]:
print(class_labels.columns)