# DINOv2

In [1]:
import torch
from torchvision import datasets, transforms

# Load DINOv2 onto GPU
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14').to(device)
model.eval()

# Transform for DINOv2 (zero-padding + normalization)
dinov2_transform = transforms.Compose([
    transforms.Pad((96, 96)),  # (224-32)/2 = 96 pixels padding
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Transform for original CIFAR-10 (just ToTensor to get raw pixels)
original_transform = transforms.ToTensor()

# Load dataset twice (once for DINOv2, once for original)
cifar_dinov2 = datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=dinov2_transform,
)

cifar_original = datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=original_transform,
)

# Create DataLoaders
loader_dinov2 = torch.utils.data.DataLoader(
    cifar_dinov2,
    batch_size=512,
    num_workers=4,
    pin_memory=True,
)

loader_original = torch.utils.data.DataLoader(
    cifar_original,
    batch_size=512,
    num_workers=4,
    pin_memory=True,
)

# Extract DINOv2 embeddings
embeddings, labels = [], []
with torch.no_grad():
    for images, targets in loader_dinov2:
        images = images.to(device, non_blocking=True)
        embeddings.append(model(images).cpu())
        labels.append(targets)

embeddings = torch.cat(embeddings)  # Shape: [10000, 384]
labels = torch.cat(labels)  # Shape: [10000]

# Extract original images (32x32, no padding/normalization)
original_images = []
for images, _ in loader_original:
    original_images.append(images)

original_images = torch.cat(original_images)  # Shape: [10000, 3, 32, 32]

# Save results (optional)
torch.save({
    'embeddings': embeddings,
    'labels': labels,
    'original_images': original_images,
}, 'cifar10_dinov2_features_and_originals.pt')

print("Shapes:")
print(f"Embeddings: {embeddings.shape}")  # [10000, 384]
print(f"Labels: {labels.shape}")  # [10000]
print(f"Original Images: {original_images.shape}")  # [10000, 3, 32, 32]

Using cache found in /root/.cache/torch/hub/facebookresearch_dinov2_main


Shapes:
Embeddings: torch.Size([10000, 384])
Labels: torch.Size([10000])
Original Images: torch.Size([10000, 3, 32, 32])


In [2]:
import numpy as np
from scipy.stats import pearsonr

def correlation_dissimilarity(emb1, emb2):
    """
    emb1 (np.array) : embedding in one feature space
    emb2 (np.array) : embedding in another feature space
    """
    dissim1 = 1. - np.corrcoef(emb1)
    dissim2 = 1. - np.corrcoef(emb2)

    triu_indices = np.triu_indices_from(dissim1, k=1)
    flat1 = dissim1[triu_indices]
    flat2 = dissim2[triu_indices]

    # Compute second-order similarity (Pearson correlation)
    r, _ = pearsonr(flat1, flat2)
    return r


from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def train_linear_classifier(X, y, test_size=0.2, random_state=42, **kwargs):
    """
    Trains a linear classifier (Logistic Regression) and returns the model and accuracy.

    Parameters:
    X (array-like): Feature matrix
    y (array-like): Target vector
    test_size (float): Proportion of data to use for testing (default: 0.2)
    random_state (int): Random seed for reproducibility (default: 42)
    **kwargs: Additional arguments to pass to LogisticRegression

    Returns:
    tuple: (trained_model, accuracy_score)
    """
    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state
    )

    # Initialize and train the linear classifier
    model = LogisticRegression(**kwargs)
    model.fit(X_train, y_train)

    # Make predictions and calculate accuracy
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    return model, accuracy

In [7]:
embeddings_np = embeddings.detach().cpu().numpy()
original_images_np = original_images.detach().cpu().numpy().reshape(original_images.shape[0], -1)
# correlation_dissimilarity(embeddings_np[:1000], embeddings_np[1000:2000])
correlation_dissimilarity(embeddings_np[:100], original_images_np[:100])

np.float64(0.23679215246431365)