In [None]:
import os
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'

In [None]:
import csv
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image

In [None]:
device = None
if torch.cuda.is_available():
    device = torch.device("cuda")  # Use GPU
    print("Using GPU (CUDA)")
elif torch.backends.mps.is_available():
    device = torch.device("mps")  # Use MPS (for macOS with Apple Silicon)
    print("Using MPS (Metal Performance Shaders)")
else:
    device = torch.device("cpu")  # Use CPU
    print("Using CPU")

assert(device is not None)

In [None]:
DATA_DIR = "./data"
TRAIN_IMAGE_PATH = f"{DATA_DIR}/train_images"
TEST_IMAGE_PATH = f"{DATA_DIR}/test_images"
MODEL = 'dinov2_vitl14'
TEST_EMBEDDINGS_PATH = f"{DATA_DIR}/{MODEL}_data/test_embeddings.csv"
TRAIN_EMBEDDINGS_PATH = f"{DATA_DIR}/{MODEL}_data/train_embeddings.csv"
BATCH_SIZE = 100



In [None]:
def loadImages(IMAGE_PATH):
    filenames = [(f.split('.jpeg')[0], f) for f in os.listdir(IMAGE_PATH) if f.endswith('.jpeg')]
    imageMap = [ (int(i), Image.open(os.path.join(IMAGE_PATH, p)).convert("RGB")) for i, p in filenames ]
    return imageMap

trainIidsImgs = loadImages(TRAIN_IMAGE_PATH)
testIidsImgs = loadImages(TEST_IMAGE_PATH)

In [None]:
image_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

class ImageIDDataset(Dataset):
    def __init__(self, iids):
        self.iids = iids 

    def __len__(self):
        return len(self.iids)

    def __getitem__(self, idx):
        image_id, image = self.iids[idx]
        image = image_transform(image)
        return image_id, image

trainDataLoader = DataLoader(ImageIDDataset(trainIidsImgs), batch_size=BATCH_SIZE, shuffle=False)
testDataLoader = DataLoader(ImageIDDataset(testIidsImgs), batch_size=BATCH_SIZE, shuffle=False)

In [None]:
model = torch.hub.load('facebookresearch/dinov2', MODEL)
model.to(device)
model.eval()
print("Got model!")

In [None]:
output_size = model.norm.normalized_shape[0]
output_size

In [None]:

for loader, path in [(trainDataLoader, TRAIN_EMBEDDINGS_PATH), (testDataLoader, TEST_EMBEDDINGS_PATH)]:
    with open(path, 'a') as file:
        header = ['img_id'] + [f'f{i}' for i in range(output_size)]
        writer = csv.writer(file)
        writer.writerow(header)

        for i, (img_ids, images) in enumerate(loader):
            print(i, len(loader))

            with torch.no_grad():
                images = images.to(device)
                features = model(images)
            features = features.to('cpu')

            for img_id, feature in zip(img_ids, features):
                
                data = [img_id.item()] + feature.tolist()
                writer.writerow(data)
            