In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from transformers import AutoModel
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# CIFAR-100 mean and std (for normalization)
mean = [0.5071, 0.4867, 0.4408]
std = [0.2675, 0.2565, 0.2761]


transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

train_dataset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=False, num_workers=2)

model = AutoModel.from_pretrained("facebook/dinov2-giant").to(device).eval()

def extract_features(loader, model):
    features = []
    with torch.no_grad():
        for images, _ in loader:
            images = images.to(device)
            outputs = model(images).last_hidden_state[:, 0, :]  
            features.append(outputs.cpu().numpy())
    return np.concatenate(features)

print("starting to extract features")
features100 = extract_features(train_loader, model)
print(f"Extracted features shape: {features100.shape}") 



In [None]:


def save_features(features, filename="features.npy"):
    np.save(filename, features)
    print(f"Features saved to {filename}")

def load_features(filename="features.npy"):
    features = np.load(filename)
    print(f"Features loaded from {filename}, shape: {features.shape}")
    return features


save_features(features100, "cifar100_dino2giant_test_features.npy")

