In [1]:
from src.data.dataset import get_data, base_novel_categories, split_dataset, CLASS_NAMES
from src.models.clip_wrapper import load_clip_model
from src.training.evaluation import eval, linear_probe_evaluation
from src.utils.metrics import harmonic_mean

# CLIP base performance on flowers102

In [2]:
model, preprocess, device = load_clip_model(model_name="ViT-B/16")

train_set, val_set, test_set = get_data(transform=preprocess)

base_classes, novel_classes = base_novel_categories(train_set)

train_base, train_novel = split_dataset(train_set, base_classes)
val_base, _ = split_dataset(val_set, base_classes)
test_base, test_novel = split_dataset(test_set, base_classes)

In [3]:
base_accuracy = eval(model=model, dataset=test_base, categories=base_classes, batch_size=128, device=device, label="🧠 Zero-shot evaluation on Base Classes")
novel_accuracy = eval(model=model, dataset=test_novel, categories=novel_classes, batch_size=128, device=device, label="🧠 Zero-shot evaluation on Novel Classes")

print()
print(f"🔍 Base classes accuracy: {base_accuracy*100:.2f}%")
print(f"🔍 Novel classes accuracy: {novel_accuracy*100:.2f}%")

print(f"🔍 Harmonic Mean: {harmonic_mean(base_accuracy, novel_accuracy)*100:.2f}%")

🧠 Zero-shot evaluation on Base Classes: 100%|██████████| 20/20 [00:08<00:00,  2.38it/s]
🧠 Zero-shot evaluation on Novel Classes: 100%|██████████| 29/29 [00:12<00:00,  2.38it/s]


🔍 Base classes accuracy: 71.25%
🔍 Novel classes accuracy: 78.26%
🔍 Harmonic Mean: 74.59%





# Linear Probe Evaluation

The purpose of linear probe evaluation is to retrieve the features (embeddings) the model makes for the dataset of interest. We collect the labels and embeddings and then try to fit a simple classifier (usually logistic regression or a single linear layer + softmax). Evaluating the performance of this simple model gives an idea of how well the network is able to seperate the categories in the feature space.

In [4]:
linear_probe_evaluation(model, train_base, test_base, batch_size=32)

🖼️Extracting features of training set: 100%|██████████| 16/16 [00:01<00:00,  8.16it/s]
🖼️Extracting features of test set: 100%|██████████| 78/78 [00:07<00:00, 10.61it/s]


np.float64(95.63283461382936)

In [5]:
linear_probe_evaluation(model, train_novel, test_novel, batch_size=32)

🖼️Extracting features of training set: 100%|██████████| 16/16 [00:01<00:00,  8.85it/s]
🖼️Extracting features of test set: 100%|██████████| 115/115 [00:11<00:00, 10.38it/s]


np.float64(97.93253536452666)