In [5]:
import os
import torch
import numpy as np
from tqdm import tqdm
from torch.utils.data import DataLoader
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
)


1. Load CLIP model

In [9]:
import torch
import open_clip

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load pretrained OpenCLIP model
model, _, preprocess = open_clip.create_model_and_transforms(
    'ViT-B-16',
    pretrained='openai'
)
model.to(device)
model.eval()

print("Loaded OpenCLIP model (open_clip library)")


Loaded OpenCLIP model (open_clip library)


# 2. Load CIFAR100

In [None]:
from torchvision.datasets import CIFAR100

root = os.path.expanduser("~/.cache")
train_ds = 
100(root, download=True, train=True, transform=preprocess)
test_ds  = CIFAR100(root, download=True, train=False, transform=preprocess)

Files already downloaded and verified
Files already downloaded and verified


# 3. Feature Extractor

In [11]:
def extract_features(dataset, batch_size=64):
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    all_features, all_labels = [], []

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Extracting features"):
            images = images.to(device)

            feats = model.encode_image(images)
            feats = feats / feats.norm(dim=-1, keepdim=True)   # normalize

            all_features.append(feats.cpu())
            all_labels.append(labels)

    X = torch.cat(all_features).numpy()
    y = torch.cat(all_labels).numpy()
    return X, y


# 4. Run feature extraction

In [12]:
train_features, train_labels = extract_features(train_ds)
test_features, test_labels = extract_features(test_ds)

Extracting features: 100%|██████████| 782/782 [14:36<00:00,  1.12s/it]
Extracting features: 100%|██████████| 157/157 [02:54<00:00,  1.11s/it]


# 5. Train classifier

In [16]:
clf = LogisticRegression(
    C=0.316,
    max_iter=1500,
    verbose=1,
    n_jobs=-1,
    random_state=0
)

clf.fit(train_features, train_labels)

preds = clf.predict(test_features)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.


# 6. Compute metrics (multi-class macro)

In [17]:
accuracy  = accuracy_score(test_labels, preds)
precision = precision_score(test_labels, preds, average="macro")
recall    = recall_score(test_labels, preds, average="macro")  # sensitivity
f1        = f1_score(test_labels, preds, average="macro")

# ---- Specificity (macro) ----
cm = confusion_matrix(test_labels, preds)
specificities = []
for cls in range(cm.shape[0]):
    TN = np.sum(np.delete(np.delete(cm, cls, axis=0), cls, axis=1))
    FP = np.sum(np.delete(cm, cls, axis=0)[:, cls])
    specificity = TN / (TN + FP)
    specificities.append(specificity)

specificity_macro = np.mean(specificities)

# 7. Print results

In [18]:
print("\n--- Evaluation Metrics ---")
print(f"Accuracy:      {accuracy:.4f}")
print(f"Precision:     {precision:.4f}")
print(f"Recall/Sens:   {recall:.4f}")
print(f"Specificity:   {specificity_macro:.4f}")
print(f"F1 Score:      {f1:.4f}")


--- Evaluation Metrics ---
Accuracy:      0.7558
Precision:     0.7617
Recall/Sens:   0.7558
Specificity:   0.9975
F1 Score:      0.7541
